16S analysis

sample.biom.silva <- paste0(devdir, "data/16s.emu.all_json_md.biom")
sample.tree.silva <- paste0(devdir, "data/16s.emu.tree.nwk")
biom.silva <- import_biom(sample.biom.silva, treefilename = sample.tree.silva)
colnames(tax_table(biom.silva)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
sampleid.silva <- sample_names(biom.silva)
labels.biom.silva <- sample_data(biom.silva)$Label
rank_names(biom.silva)
## [1] "Kingdom" "Phylum"  "Class"   "Order"   "Family"  "Genus"   "Species"
sample_variables(biom.silva)
## [1] "BarcodeSequence" "Culture_setup"   "Inoculum"        "Label"          
## [5] "PCR_amplicon"    "Primer"          "ProjectID"       "Time_point"
sample_names(biom.silva)
##  [1] "flaregas-1"  "flaregas-2"  "flaregas-3"  "flaregas-4"  "flaregas-5" 
##  [6] "flaregas-6"  "flaregas-7"  "flaregas-8"  "flaregas-9"  "flaregas-10"
## [11] "flaregas-11" "flaregas-12" "flaregas-13" "flaregas-14" "flaregas-15"
## [16] "flaregas-16" "flaregas-17" "flaregas-18" "flaregas-19" "flaregas-20"
## [21] "flaregas-21" "flaregas-22" "flaregas-23" "flaregas-24" "flaregas-25"
## [26] "flaregas-26" "flaregas-27" "flaregas-28" "flaregas-29" "flaregas-30"
knitr::kable(as.matrix(sample_data(biom.silva)))
BarcodeSequence Culture_setup Inoculum Label PCR_amplicon Primer ProjectID Time_point
flaregas-1 BC01 ocean Helsingor H-LC-t0 16S 27F+1492R FAS64881 0
flaregas-2 BC02 light+clean_gas Helsingor H-LC-t1 16S 27F+1492R FAS64881 7
flaregas-3 BC03 light+clean_gas Helsingor H-LC-t3 16S 27F+1492R FAS64881 14
flaregas-4 BC04 ocean Helsingor H-LC-t0 18S ss5+ss3 FAS64881 0
flaregas-5 BC05 light+clean_gas Helsingor H-LC-t1 18S ss5+ss3 FAS64881 7
flaregas-6 BC06 light+clean_gas Helsingor H-LC-t3 18S ss5+ss3 FAS64881 14
flaregas-7 BC07 ocean Helsingor H-LC-t0 18S EukA+EukB FAS64881 0
flaregas-8 BC08 light+clean_gas Helsingor H-LC-t1 18S EukA+EukB FAS64881 7
flaregas-9 BC09 light+clean_gas Helsingor H-LC-t3 18S EukA+EukB FAS64881 14
flaregas-10 BC10 ocean Helsingor H-LC-t0 16S+18S 27F+1492R++EukA+EukB FAS64881 0
flaregas-11 BC01 ocean Hornbaek-deep Deep-DC-t0 16S+18S 27F+1492R++EukA+EukB FAS65981 0
flaregas-12 BC02 ocean Hornbaek-deep Deep-DC-t0 16S+18S 27F+1492R++EukA+EukB FAS65981 0
flaregas-13 BC03 light+clean_gas Hornbaek-deep Deep-LC-t1 16S+18S 27F+1492R++EukA+EukB FAS65981 5
flaregas-14 BC04 light+clean_gas Hornbaek-deep Deep-LC-t2 16S+18S 27F+1492R++EukA+EukB FAS65981 7
flaregas-15 BC05 light+clean_gas Hornbaek-deep Deep-LC-t3 16S+18S 27F+1492R++EukA+EukB FAS65981 11
flaregas-16 BC06 light+clean_gas Hornbaek-deep Deep-LC-t3 16S+18S 27F+1492R++EukA+EukB FAS65981 11
flaregas-17 BC07 light+used_gas Hornbaek-deep Deep-LU-t1 16S+18S 27F+1492R++EukA+EukB FAS65981 5
flaregas-18 BC08 light+used_gas Hornbaek-deep Deep-LU-t2 16S+18S 27F+1492R++EukA+EukB FAS65981 7
flaregas-19 BC09 light+used_gas Hornbaek-deep Deep-LU-t3 16S+18S 27F+1492R++EukA+EukB FAS65981 11
flaregas-20 BC10 light+used_gas Hornbaek-deep Deep-LU-t3 16S+18S 27F+1492R++EukA+EukB FAS65981 11
flaregas-21 BC11 dark+clean_gas Hornbaek-deep Deep-DC-t1 16S+18S 27F+1492R++EukA+EukB FAS64830 5
flaregas-22 BC12 dark+clean_gas Hornbaek-deep Deep-DC-t2 16S+18S 27F+1492R++EukA+EukB FAS64830 7
flaregas-23 BC13 dark+clean_gas Hornbaek-deep Deep-DC-t3 16S+18S 27F+1492R++EukA+EukB FAS64830 11
flaregas-24 BC14 dark+clean_gas Hornbaek-deep Deep-DC-t3 16S+18S 27F+1492R++EukA+EukB FAS64830 11
flaregas-25 BC15 ocean Hornbaek-shallow Shallow-DC-t0 16S+18S 27F+1492R++EukA+EukB FAS64830 0
flaregas-26 BC16 ocean Hornbaek-shallow Shallow-DC-t0 16S+18S 27F+1492R++EukA+EukB FAS64830 0
flaregas-27 BC17 dark+clean_gas Hornbaek-shallow Shallow-DC-t1 16S+18S 27F+1492R++EukA+EukB FAS64830 5
flaregas-28 BC18 dark+clean_gas Hornbaek-shallow Shallow-DC-t2 16S+18S 27F+1492R++EukA+EukB FAS64830 7
flaregas-29 BC19 dark+clean_gas Hornbaek-shallow Shallow-DC-t3 16S+18S 27F+1492R++EukA+EukB FAS64830 11
flaregas-30 BC20 dark+clean_gas Hornbaek-shallow Shallow-DC-t3 16S+18S 27F+1492R++EukA+EukB FAS64830 11

Preprocessing data

Subset prokaryotes (16S)

#remove Chloroplast (order level) and Mitochondria (family level)
biom.filter.silva <- subset_taxa(biom.silva, Order!="Chloroplast")
biom.filter.silva <- subset_taxa(biom.filter.silva, Family!="Mitochondria")

#get prokaryotes (16S)
biom.16s <- subset_taxa(biom.filter.silva, Kingdom=="Bacteria")

#remove 18s specific samples
biom.16s <- prune_samples(!(sample_names(biom.16s)) %in% c("flaregas-4","flaregas-5","flaregas-6","flaregas-7","flaregas-8","flaregas-9"), biom.16s)
sampleid.16s <- sample_names(biom.16s)
label.16s <- sample_data(biom.16s)$Label

meta <- rownames_to_column(as.data.frame(as.matrix(biom.16s@sam_data))) %>% dplyr::rename("Sample"="rowname")

##convert counts to integers for alpha diversity measures
biom.16s.int <- biom.16s
otu_table(biom.16s.int) <- otu_table(matrix(as.integer(otu_table(biom.16s)), ncol = nsamples(biom.16s), dimnames = list(taxa_names(biom.16s), sample_names(biom.16s))), taxa_are_rows = taxa_are_rows(biom.16s))

Define Methanotrophs

#define methanotrophs
#https://doi.org/10.1007/978-3-319-74866-5_2
methanotrophs.family <- c("Methylococcaceae",
                   "Methylohalobiaceae",
                   "Methylomonadaceae")
methanotrophs.genus <- c("Methylobacterium-Methylorubrum",
                   "Methylocapsa",
                   "Methylocella",
                   "Methylocystis",
                   "Methyloferula",
                   "Methylosinus",
                   "Methylorosula",
                   "Methylovirgula",
                   "Candidatus_Methylacidiphilum",
                   "Methyloceanibacter")

tax <- data.frame(tax_table(biom.16s))
tax.clean <- tax %>% mutate(Phylum = if_else(Genus %in% methanotrophs.genus, "Methanotroph", Phylum))
tax.clean <- tax.clean %>% mutate(Phylum = if_else(Family %in% methanotrophs.family, "Methanotroph", Phylum))
tax_table(biom.16s) <- as.matrix(tax.clean)

Prevalence filtering

Remove all taxa that only occur in a single sample

biom.16s
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1584 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 8 sample variables ]
## tax_table()   Taxonomy Table:    [ 1584 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1584 tips and 1582 internal nodes ]
#tax_stats(biom.16s)
# Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(biom.16s),
              MARGIN = ifelse(taxa_are_rows(biom.16s), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})
prevdf = data.frame(Prevalence = prev0, TotalAbundance = taxa_sums(biom.16s), tax_table(biom.16s))
ggplot(prevdf, aes(Prevalence)) + geom_bar()

# Define prevalence threshold as 5% of total samples
prevalenceThreshold = 0.05 * nsamples(biom.16s)
prevalenceThreshold
## [1] 1.2
# Execute prevalence filter, using `prune_taxa()` function
biom.16s = prune_taxa((prev0 > prevalenceThreshold), biom.16s)
biom.16s
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 978 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 8 sample variables ]
## tax_table()   Taxonomy Table:    [ 978 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 978 tips and 976 internal nodes ]
#tax_stats(biom.16s)

#pseq <- core(pseq.comp, detection = 1/1000, prevalence = 6/30) #table(meta$Culture_setup) -> minimum no. samples of dark-light-ocean: 6
# Merge rare taxa to speed up examples
#pseq <- aggregate_rare(pseq.comp, level = "Genus", detection = 0, prevalence = 6/30)

Normalization and transformation

Relative abundance

# Transform to relative abundance. Save as new object.
biom.16s.rel <- biom.16s %>% transform_sample_counts(function(x) {x / sum(x)})
#alternatively run: biom.16s.rel <- microbiome::transform(biom.16s, "compositional")

# visualize relative abundance distribution
d.rel.melt <- melt(otu_table(biom.16s.rel), id.vars = colnames(otu_table(biom.16s.rel))) %>% dplyr::rename("Sample" = "Var2", "Relative abundance" = "value")
d.rel.melt <- inner_join(d.rel.melt, meta, by = "Sample")
pdf(paste0(wdir,"fig/rel_abundance-distr-16s.pdf"), width = 14, height = 10)
ggplot(d.rel.melt, aes(`Relative abundance`, Sample, fill=Inoculum)) + geom_boxplot(outliers = F) + coord_flip() + facet_wrap(~ Culture_setup, ncol = 2, scales = "free_x") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()

Centered log-ratio transformation

# load the library zCompositions to perform 0 replacement
library(zCompositions)

# we are using the Count Zero Multiplicative approach
# z.warning is threshold used to identify individual rows or columns including an excess of zeros/unobserved values
d.n0 <- cmultRepl(otu_table(biom.16s), method="CZM", label=0, z.warning=0.99)
dim(d.n0)

# generate the centered log-ratio transformed data
# samples by row
d.clr <- apply(d.n0, 2, function(x) log(x) - mean(log(x)))
biom.16s.clr <- biom.16s
otu_table(biom.16s.clr) <- otu_table(d.clr, taxa_are_rows = TRUE)

# check if microbiome::transform gives us the same
#pseq.comp <- microbiome::transform(biom.16s, "clr")
#d.clr.melt <- melt(otu_table(pseq.comp), id.vars = colnames(otu_table(pseq.comp))) %>% dplyr::rename("Sample" = "Var2", "clr" = "value")

# visualize clr distribution
d.clr.melt <- melt(otu_table(biom.16s.clr), id.vars = colnames(otu_table(biom.16s.clr))) %>% dplyr::rename("Sample" = "Var2", "clr" = "value")
d.clr.melt <- inner_join(d.clr.melt, meta, by = "Sample")
pdf(paste0(wdir,"fig/clr-distr-16s.pdf"), width = 14, height = 10)
ggplot(d.clr.melt, aes(clr, Sample, fill=Inoculum)) + geom_boxplot(outliers = F) + coord_flip() + facet_wrap(~ Culture_setup, ncol = 2, scales = "free_x") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()

Most abundant genera

Maximal relative abundances (>3%) for each Culture setup aggregated for each genus

  temp.16s <- merge_samples(biom.16s, "Culture_setup") %>% tax_glom(taxrank = "Genus") %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt() %>% filter(Abundance>0.03) %>% arrange(Genus) %>% dplyr::arrange(desc(Abundance))
  knitr::kable(as.matrix(temp.16s[,c(2,3,13:17)]))
Sample Abundance Phylum Class Order Family Genus
light+clean_gas 0.33453718 Bacteroidota Bacteroidia Chitinophagales Saprospiraceae Aureispira
dark+clean_gas 0.32764094 Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Marivita
ocean 0.27227980 Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Lentibacter
light+used_gas 0.26110341 Bacteroidota Bacteroidia Chitinophagales Saprospiraceae Aureispira
light+used_gas 0.21513669 Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Marivita
light+clean_gas 0.14536840 Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Marivita
light+used_gas 0.13484422 Cyanobacteria Cyanobacteriia Cyanobacteriales Geitlerinemaceae Geitlerinema_PCC-7105
dark+clean_gas 0.12307436 Proteobacteria Gammaproteobacteria Nitrosococcales Methylophagaceae Methylophaga
light+clean_gas 0.10873896 Cyanobacteria Cyanobacteriia Cyanobacteriales Geitlerinemaceae Geitlerinema_PCC-7105
ocean 0.09912371 Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae NS3a_marine_group
dark+clean_gas 0.08184165 Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylomicrobium
light+used_gas 0.07883621 Bacteroidota Bacteroidia Chitinophagales Saprospiraceae Saprospiraceae_uncultured
ocean 0.05321352 Bacteroidota Bacteroidia Flavobacteriales Cryomorphaceae Cryomorphaceae_uncultured
ocean 0.05241535 Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Planktomarina
light+clean_gas 0.04592369 Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Flavobacterium
light+clean_gas 0.04436378 Bacteroidota Bacteroidia Chitinophagales Saprospiraceae Saprospiraceae_uncultured
light+used_gas 0.04364331 Bacteroidota Bacteroidia Chitinophagales Saprospiraceae Lewinella
light+clean_gas 0.03774251 Bacteroidota Bacteroidia Chitinophagales Saprospiraceae Lewinella
light+used_gas 0.03367726 Proteobacteria Alphaproteobacteria Rhodospirillales Thalassospiraceae Thalassospira
light+clean_gas 0.03128879 Proteobacteria Alphaproteobacteria Rhodospirillales Thalassospiraceae Thalassospira

Community typing with Dirichlet Multinomial Mixtures

set.seed(12345)

#https://microbiome.github.io/tutorials/DMM.html
library(DirichletMultinomial)

# prefilter relative abundances for prevalent taxa
biom.16s.rel.genus <- biom.16s.rel %>% tax_glom(taxrank = "Genus")
biom.16s.genus <- biom.16s %>% tax_glom(taxrank = "Genus")
taxa <- core_members(biom.16s.rel.genus, detection = 1/200, prevalence = 6/30) #table(meta$Culture_setup) -> minimum no. samples of dark-light-ocean: 6
pseq <- prune_taxa(taxa, biom.16s.genus)
dat <- abundances(pseq)
#rownames(dat) <- paste(tax_table(pseq)[,"Genus"], tax_table(pseq)[,"Species"], sep = "::")
rownames(dat) <- tax_table(pseq)[,"Genus"]
count <- as.matrix(t(dat))

# fit the DMM model
fit <- lapply(1:7, dmn, count = count, verbose=TRUE, seed = 1234567)
## dmn, k=1
##   Soft kmeans
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=2
##   Soft kmeans
##     iteration 10 change 0.000000
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=3
##   Soft kmeans
##     iteration 10 change 0.000007
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=4
##   Soft kmeans
##     iteration 10 change 0.012249
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=5
##   Soft kmeans
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=6
##   Soft kmeans
##     iteration 10 change 0.000001
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=7
##   Soft kmeans
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
# Check model fit with different number of mixture components using standard information criteria
lplc <- base::sapply(fit, DirichletMultinomial::laplace) # AIC / BIC / Laplace
aic  <- base::sapply(fit, DirichletMultinomial::AIC) # AIC / BIC / Laplace
bic  <- base::sapply(fit, DirichletMultinomial::BIC) # AIC / BIC / Laplace
plot(lplc, type="b", xlab="Number of Dirichlet Components", ylab="Model Fit", ylim=c(min(c(lplc,aic,bic), na.rm = T), max(c(lplc,aic,bic), na.rm = T))); lines(aic, type="b", lty = 2); lines(bic, type="b", lty = 3)

# pick the optimal model
best <- fit[[which.min(unlist(lplc))]]

# mixture parameters pi and theta
mixturewt(best)
##          pi    theta
## 1 0.3750003 19.58217
## 2 0.3749997 18.62693
## 3 0.2500000 65.94614
# sample-component assignments
ass <- apply(mixture(best), 1, which.max)
temp.meta <- meta
temp.meta$cluster <- ass
knitr::kable(as.matrix(temp.meta %>% dplyr::select(Culture_setup, Inoculum, Time_point, cluster)))
Culture_setup Inoculum Time_point cluster
ocean Helsingor 0 3
light+clean_gas Helsingor 7 1
light+clean_gas Helsingor 14 2
ocean Helsingor 0 3
ocean Hornbaek-deep 0 3
ocean Hornbaek-deep 0 3
light+clean_gas Hornbaek-deep 5 1
light+clean_gas Hornbaek-deep 7 1
light+clean_gas Hornbaek-deep 11 1
light+clean_gas Hornbaek-deep 11 1
light+used_gas Hornbaek-deep 5 1
light+used_gas Hornbaek-deep 7 1
light+used_gas Hornbaek-deep 11 1
light+used_gas Hornbaek-deep 11 1
dark+clean_gas Hornbaek-deep 5 2
dark+clean_gas Hornbaek-deep 7 2
dark+clean_gas Hornbaek-deep 11 2
dark+clean_gas Hornbaek-deep 11 2
ocean Hornbaek-shallow 0 3
ocean Hornbaek-shallow 0 3
dark+clean_gas Hornbaek-shallow 5 2
dark+clean_gas Hornbaek-shallow 7 2
dark+clean_gas Hornbaek-shallow 11 2
dark+clean_gas Hornbaek-shallow 11 2
# Contribution of each taxonomic group to each component
for (k in seq(ncol(fitted(best)))) {
  d <- melt(fitted(best))
  colnames(d) <- c("OTU", "cluster", "value")
  d <- subset(d, cluster == k) %>%
     # Arrange OTUs by assignment strength
     arrange(value) %>%
     mutate(OTU = factor(OTU, levels = unique(OTU))) %>%
     # Only show the most important drivers
     filter(abs(value) > quantile(abs(value), 0.8))     

  p <- ggplot(d, aes(x = OTU, y = value)) +
       geom_bar(stat = "identity") +
       coord_flip() +
       labs(title = paste("Top drivers: community type", k))
  print(p)
}

Cyanobacteria and Methanotrophs

Subset cyanobacteria and methanotrophs

#get cyanobacteria
biom.cyano <- subset_taxa(biom.16s, Phylum=="Cyanobacteria")
biom.cyano.clr <- subset_taxa(biom.16s.clr, Phylum=="Cyanobacteria")
biom.cyano.rel <- subset_taxa(biom.16s.rel, Phylum=="Cyanobacteria")
biom.cyano.rel
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 53 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 8 sample variables ]
## tax_table()   Taxonomy Table:    [ 53 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 53 tips and 52 internal nodes ]
tax_stats(biom.cyano.rel)
## [1] "biom.cyano.rel: No. reads 1.7280778219388, No. species 53, No. genus 19, No. family 12"
#get methanotrophs
biom.mob <- subset_taxa(biom.16s, Phylum=="Methanotroph")
biom.mob.clr <- subset_taxa(biom.16s.clr, Phylum=="Methanotroph")
biom.mob.rel <- subset_taxa(biom.16s.rel, Phylum=="Methanotroph")
biom.mob.rel
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 22 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 8 sample variables ]
## tax_table()   Taxonomy Table:    [ 22 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 22 tips and 21 internal nodes ]
tax_stats(biom.mob.rel)
## [1] "biom.mob.rel: No. reads 1.34902775524237, No. species 22, No. genus 8, No. family 1"

Cyanobacteria

cyanobac <- tax_glom(biom.cyano.rel, taxrank = "Phylum")
p1 <- plot_bar(cyanobac) + geom_bar(stat = "identity", position = "stack", fill = "dark green") + ggtitle("Cyanobacteria relative abundance") + theme(axis.text.x =element_text(angle=90,hjust=1)) + scale_x_discrete(limits = sampleid.16s)
cyanobac <- tax_glom(biom.cyano.clr, taxrank = "Phylum")
p2 <- plot_bar(cyanobac) + geom_bar(stat = "identity", position = "stack", fill = "dark green") + ggtitle("Cyanobacteria centered log-ratio") + theme(axis.text.x =element_text(angle=90,hjust=1)) + scale_x_discrete(limits = sampleid.16s)
plot(ggarrange(p1, p2, ncol = 2, nrow = 1))

#relative abundance of all bacteria
taxa.cyano <- biom.cyano.rel %>% tax_glom(taxrank = "Family") %>%
                                psmelt() %>%
                                filter(Abundance>0.001) %>%
                                arrange(Family)
no.taxa <- length(levels(as.factor(taxa.cyano$Family)))
getPalette = colorRampPalette(brewer.pal(8, "Set1"))
my.palette = getPalette(no.taxa)
#taxa.cyano$Family <- fct_reorder2(taxa.cyano$Family, taxa.cyano$Abundance, taxa.cyano$Time_point)
taxa.cyano$Culture_setup <- factor(taxa.cyano$Culture_setup, levels = c("ocean", "light+clean_gas", "light+used_gas", "dark+clean_gas"))
taxa.cyano$Time_point <- factor(taxa.cyano$Time_point, levels = c("0","5","7","11","14"))
pdf(paste0(wdir, "fig/family-over-time-cyano-rel2bact.pdf"), width = 13, height = 13)
ggplot(taxa.cyano, aes(x = Time_point, y = Abundance, fill = Family)) +  geom_bar(stat = "identity") + facet_grid(vars(Inoculum),vars(Culture_setup), scales = "free_x", space = "free_x") + scale_fill_manual(values = my.palette) + ylab("Relative abundance")
dev.off()

#relative abundance of all cyanobacteria
#set counts to zero if relative abundance is less than 0.001
filterfun1 = function(x){
  x[(x / sum(x)) < (0.0001)] <- 0
  return(x)
}
biom.16s.sub <- transform_sample_counts(biom.16s, fun = filterfun1)
biom.cyano.sub <- subset_taxa(biom.16s.sub, Phylum=="Cyanobacteria")
#samples with the same label should be merged for plotting of relative abundances for Culture_setup-Inoculum-Time_point categories
biom.cyano.nr <- biom.cyano.sub
sample_data(biom.cyano.nr) <- sample_data(biom.cyano.nr)[,c("Label", "Culture_setup", "Inoculum", "Time_point")]
biom.cyano.nr <- merge_samples(biom.cyano.nr, "Label") #merge replicates
temp.cyano.nr.sample_data <- sample_data(biom.cyano)[,c("Label", "Culture_setup", "Inoculum", "Time_point")]
#https://rdrr.io/github/kstagaman/phyloseqCompanion/src/R/sample_data_frame.R
sample.data.frame <- function(ps) {
  return(as(phyloseq::sample_data(ps), "data.frame"))
}
temp.cyano.nr.sample_data <- sample.data.frame(temp.cyano.nr.sample_data) %>% distinct()
rownames(temp.cyano.nr.sample_data) <- temp.cyano.nr.sample_data$Label
sample_data(biom.cyano.nr) <- temp.cyano.nr.sample_data
taxa.cyano <- biom.cyano.nr %>% tax_glom(taxrank = "Family") %>%
                                transform_sample_counts(function(x) {x / sum(x)}) %>%
                                psmelt() %>%
                                filter(Abundance>0.01) %>%
                                arrange(Family)
no.taxa <- length(levels(as.factor(taxa.cyano$Family)))
getPalette = colorRampPalette(brewer.pal(8, "Set1"))
my.palette = getPalette(no.taxa)
#taxa.cyano$Family <- fct_reorder2(taxa.cyano$Family, taxa.cyano$Abundance, taxa.cyano$Time_point)
taxa.cyano$Culture_setup <- factor(taxa.cyano$Culture_setup, levels = c("ocean", "light+clean_gas", "light+used_gas", "dark+clean_gas"))
taxa.cyano$Time_point <- factor(taxa.cyano$Time_point, levels = c("0","5","7","11","14"))
pdf(paste0(wdir, "fig/family-over-time-cyano-rel2cyano.pdf"), width = 13, height = 13)
ggplot(taxa.cyano, aes(x = Time_point, y = Abundance, fill = Family)) +  geom_bar(stat = "identity") + facet_grid(vars(Inoculum),vars(Culture_setup), scales = "free_x", space = "free_x") + scale_fill_manual(values = my.palette) + ylab("Relative abundance")
dev.off()


`

Methanotroph

mob <- tax_glom(biom.mob.rel, taxrank = "Phylum")
p1 <- plot_bar(mob) + geom_bar(stat = "identity", position = "stack", fill = "dark green") + ggtitle("Methanotroph relative abundance") + theme(axis.text.x =element_text(angle=90,hjust=1)) + scale_x_discrete(limits = sampleid.16s)
mob <- tax_glom(biom.mob.clr, taxrank = "Phylum")
p2 <- plot_bar(mob) + geom_bar(stat = "identity", position = "stack", fill = "dark green") + ggtitle("Methanotroph centered log-ratio") + theme(axis.text.x =element_text(angle=90,hjust=1)) + scale_x_discrete(limits = sampleid.16s)
plot(ggarrange(p1, p2, ncol = 2, nrow = 1))

#relative abundance of all bacteria
taxa.mob <- biom.mob.rel %>% tax_glom(taxrank = "Genus") %>%
                                psmelt() %>%
                                filter(Abundance>0.0001) %>%
                                arrange(Genus)
no.taxa <- length(levels(as.factor(taxa.mob$Genus)))
getPalette = colorRampPalette(brewer.pal(8, "Set2"))
my.palette = getPalette(no.taxa)
#taxa.mob$Family <- fct_reorder2(taxa.mob$Family, taxa.mob$Abundance, taxa.mob$Time_point)
taxa.mob$Culture_setup <- factor(taxa.mob$Culture_setup, levels = c("ocean", "light+clean_gas", "light+used_gas", "dark+clean_gas"))
taxa.mob$Time_point <- factor(taxa.mob$Time_point, levels = c("0","5","7","11","14"))
pdf(paste0(wdir, "fig/genus-over-time-mob-rel2mob.pdf"), width = 13, height = 13)
ggplot(taxa.mob, aes(x = Time_point, y = Abundance, fill = Genus)) +  geom_bar(stat = "identity") + facet_grid(vars(Inoculum),vars(Culture_setup), scales = "free_x", space = "free_x") + scale_fill_manual(values = my.palette) + ylab("Relative abundance")
dev.off()
## png 
##   2
#relative abundance of all methanotrophs
#set counts to zero if relative abundance is less than 0.001
filterfun1 = function(x){
  x[(x / sum(x)) < (0.0001)] <- 0
  return(x)
}
biom.16s.sub <- transform_sample_counts(biom.16s, fun = filterfun1)
biom.mob.sub <- subset_taxa(biom.16s.sub, Phylum=="Methanotroph")
#samples with the same label should be merged for plotting of relative abundances for Culture_setup-Inoculum-Time_point categories
biom.mob.nr <- biom.mob.sub
sample_data(biom.mob.nr) <- sample_data(biom.mob.nr)[,c("Label", "Culture_setup", "Inoculum", "Time_point")]
biom.mob.nr <- merge_samples(biom.mob.nr, "Label") #merge replicates
temp.mob.nr.sample_data <- sample_data(biom.mob)[,c("Label", "Culture_setup", "Inoculum", "Time_point")]
#https://rdrr.io/github/kstagaman/phyloseqCompanion/src/R/sample_data_frame.R
sample.data.frame <- function(ps) {
  return(as(phyloseq::sample_data(ps), "data.frame"))
}
temp.mob.nr.sample_data <- sample.data.frame(temp.mob.nr.sample_data) %>% distinct()
rownames(temp.mob.nr.sample_data) <- temp.mob.nr.sample_data$Label
sample_data(biom.mob.nr) <- temp.mob.nr.sample_data
taxa.mob <- biom.mob.nr %>% tax_glom(taxrank = "Genus") %>%
                                transform_sample_counts(function(x) {x / sum(x)}) %>%
                                psmelt() %>%
                                filter(Abundance>0.01) %>%
                                arrange(Genus)
no.taxa <- length(levels(as.factor(taxa.mob$Genus)))
getPalette = colorRampPalette(brewer.pal(8, "Set2"))
my.palette = getPalette(no.taxa)
#taxa.mob$Family <- fct_reorder2(taxa.mob$Family, taxa.mob$Abundance, taxa.mob$Time_point)
taxa.mob$Culture_setup <- factor(taxa.mob$Culture_setup, levels = c("ocean", "light+clean_gas", "light+used_gas", "dark+clean_gas"))
taxa.mob$Time_point <- factor(taxa.mob$Time_point, levels = c("0","5","7","11","14"))
pdf(paste0(wdir, "fig/genus-over-time-mob-rel2all.pdf"), width = 13, height = 13)
ggplot(taxa.mob, aes(x = Time_point, y = Abundance, fill = Genus)) +  geom_bar(stat = "identity") + facet_grid(vars(Inoculum),vars(Culture_setup), scales = "free_x", space = "free_x") + scale_fill_manual(values = my.palette) + ylab("Relative abundance")
dev.off()
## png 
##   2
mob.light <- taxa.mob %>% filter(Culture_setup=="light+clean_gas" & Inoculum=="Hornbaek-deep" & Time_point==11) %>% group_by(Genus) %>% summarise(sum_relative_abundance = sum(Abundance)) %>% arrange(desc(sum_relative_abundance))
knitr::kable(as.matrix(mob.light))
Genus sum_relative_abundance
Methylomicrobium 0.6426891
Methylomonas 0.3573109


Preprocess minimap2 abundancies

Different statistical methods (alpha and beta diversity, DEA) require raw read counts instead of probabilities as calculated by Emu.

#get minimap2 filtered raw counts
sample.biom <- paste0(devdir, "data/16s.minimap2.all_json_md.biom")
sample.tree <- paste0(devdir, "data/16s.minimap2.tree.nwk")
biom.m <- import_biom(sample.biom, treefilename = sample.tree)
colnames(tax_table(biom.m)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
tax.m <- data.frame(tax_table(biom.m))
tax.clean.m <- data.frame(row.names = row.names(tax.m),
                        Kingdom = str_replace(tax.m[,1], "d__", ""),
                        Phylum = str_replace(tax.m[,2], "p__", ""),
                        Class = str_replace(tax.m[,3], "c__", ""),
                        Order = str_replace(tax.m[,4], "o__", ""),
                        Family = str_replace(tax.m[,5], "f__", ""),
                        Genus = str_replace(tax.m[,6], "g__", ""),
                        Species = str_replace(tax.m[,7], "s__", ""),
                        stringsAsFactors = FALSE)
tax_table(biom.m) <- as.matrix(tax.clean.m)
biom.m <- fix_duplicate_tax(biom.m)

#filter for bacteria
biom.filter.silva.m <- subset_taxa(biom.m, Order!="Chloroplast")
biom.filter.silva.m <- subset_taxa(biom.filter.silva.m, Family!="Mitochondria")
biom.16s.m <- subset_taxa(biom.filter.silva.m, Kingdom=="Bacteria")
biom.16s.m <- prune_samples(!(sample_names(biom.16s.m)) %in% c("flaregas-4","flaregas-5","flaregas-6","flaregas-7","flaregas-8","flaregas-9"), biom.16s.m)

#replace phylum to methanotrophs
tax <- data.frame(tax_table(biom.16s.m))
tax.clean <- tax %>% mutate(Phylum = if_else(Genus %in% methanotrophs.genus, "Methanotroph", Phylum))
tax.clean <- tax.clean %>% mutate(Phylum = if_else(Family %in% methanotrophs.family, "Methanotroph", Phylum))
tax_table(biom.16s.m) <- as.matrix(tax.clean)

#preprocess phyloseq object
biom.16s.m.unfiltered <- biom.16s.m
prev0.m = apply(X = otu_table(biom.16s.m),
              MARGIN = ifelse(taxa_are_rows(biom.16s.m), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})
prevdf.m = data.frame(Prevalence = prev0.m, TotalAbundance = taxa_sums(biom.16s.m), tax_table(biom.16s.m))
ggplot(prevdf.m, aes(Prevalence)) + geom_bar()

prevalenceThreshold.m = 0.05 * nsamples(biom.16s.m)
biom.16s.m = prune_taxa((prev0.m > prevalenceThreshold.m), biom.16s.m)
biom.16s.m
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 9641 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 8 sample variables ]
## tax_table()   Taxonomy Table:    [ 9641 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 9641 tips and 9639 internal nodes ]
#tax_stats(biom.16s.m)

#centered log-ratio transformation
d.n0 <- cmultRepl(otu_table(biom.16s.m), method="CZM", label=0, z.warning=0.99)
## No. adjusted imputations:  182271
dim(d.n0)
## [1] 9641   24
d.clr <- apply(d.n0, 2, function(x) log(x) - mean(log(x)))
biom.16s.m.clr <- biom.16s.m
otu_table(biom.16s.m.clr) <- otu_table(d.clr, taxa_are_rows = TRUE)

#transform to relative abundance
biom.16s.m.rel <- biom.16s.m %>% transform_sample_counts(function(x) {x / sum(x)})

#genus
#get genus counts
biom.16s.m.genus <- biom.16s.m %>% tax_glom(taxrank = "Genus")

#filter genera that exist also running Emu on minimap2 counts
biom.16s.genus.minimap2emu <- tax_table(biom.16s.m.genus) %>% as.data.frame() %>% dplyr::select(Genus) %>% rownames_to_column(var = "SpeciesID") %>% inner_join(tax_table(biom.16s.genus) %>% as.data.frame() %>% dplyr::select(Genus), by = "Genus")

Alpha diversity measure

This should not be calculated with low abundancy removed counts (so here we should use minimap2 abundancies).

  • Observed .. counts up the number of different taxa you observe in a sample at a given taxonomic level
  • Chao0 .. species richness (similar to observed)
  • Shannon .. estimator for both species richness and evenness; higher means higher uncertainty of predicting which species you would see next if you were to look at another read from this sample
  • Simpson .. Gini-Simpson Index; similar idea to Shannon; based on the probability that two entities (microbes, or reads) taken from the sample at random are of different types

Sample richness in the different culture setups and different time points (emu)

#https://docs.onecodex.com/en/articles/4136553-alpha-diversity
alpha_meas = c("Observed", "Chao1", "Shannon", "Simpson")

p <- plot_richness(biom.16s.int, "Culture_setup", "Time_point", measures=alpha_meas)
pdf(paste0(wdir, "fig/alpha-diversity-cultures-16s-emu.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Culture_setup, y=value, color=NULL), alpha=0.1)
dev.off()
## png 
##   2
#boxplot_alpha(biom.16s.int, index = "chao1", x_var = "Culture_setup") + theme_minimal() + theme(legend.position = "none")

#https://microbiome.github.io/tutorials/Alphadiversity.html
tab <-microbiome::alpha(biom.16s.int, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
knitr::kable(as.matrix(tab))
observed chao1 diversity_inverse_simpson diversity_gini_simpson diversity_shannon diversity_fisher diversity_coverage evenness_camargo evenness_pielou evenness_simpson evenness_evar evenness_bulla dominance_dbp dominance_dmn dominance_absolute dominance_relative dominance_simpson dominance_core_abundance dominance_gini rarity_log_modulo_skewness rarity_low_abundance rarity_rare_abundance
flaregas-1 364 364 23.253857 0.9569964 4.446192 54.875603 12 0.4496586 0.7539556 0.0638842 0.4972124 0.4198321 0.1641734 0.2377957 6837 0.1641734 0.0430036 0.2410614 0.9343243 2.061228 0.2384680 0.3259455
flaregas-2 205 205 18.365758 0.9455508 4.043574 31.805434 8 0.9390088 0.7596405 0.0895891 0.5112685 0.4449620 0.1812000 0.2720500 3624 0.1812000 0.0544492 0.1379000 0.9596532 2.061093 0.1390500 0.5427000
flaregas-3 71 71 25.615507 0.9609611 3.716464 11.734826 10 0.9940452 0.8718610 0.3607818 0.5741982 0.5767494 0.1161667 0.1860278 577 0.1161667 0.0390389 0.2158244 0.9792600 2.061423 0.0000000 0.6746527
flaregas-10 144 144 18.012854 0.9444841 3.950388 22.743772 10 0.8833613 0.7948765 0.1250893 0.5435716 0.4969869 0.1970528 0.2659508 2514 0.1970528 0.0555159 0.2497257 0.9669065 2.061357 0.0723468 0.2570936
flaregas-11 206 206 46.140687 0.9783272 4.620210 33.537379 22 0.9002330 0.8671767 0.2239839 0.5779056 0.5624584 0.0993126 0.1437657 1546 0.0993126 0.0216728 0.1593756 0.9436860 2.061391 0.1217319 0.3237618
flaregas-12 584 584 56.467394 0.9822907 5.240850 92.057685 32 0.6257597 0.8227522 0.0966907 0.5335078 0.4814331 0.0929103 0.1368791 4858 0.0929103 0.0177093 0.1365158 0.8728897 2.061315 0.3495324 0.4301069
flaregas-13 133 133 5.216084 0.8082853 2.844222 18.169120 3 0.9030886 0.5815989 0.0392187 0.3736430 0.3075903 0.4162777 0.4942386 11416 0.4162777 0.1917147 0.2092328 0.9850484 2.060952 0.0921091 0.1503792
flaregas-14 235 235 4.585795 0.7819353 2.753450 30.123017 2 0.7927532 0.5043331 0.0195140 0.3547464 0.2308487 0.4415997 0.5661598 32496 0.4415997 0.2180647 0.6596138 0.9820239 2.060846 0.1150883 0.1154280
flaregas-15 57 57 4.161387 0.7596955 2.295168 8.531925 2 1.0000000 0.5676821 0.0730068 0.4680488 0.3220544 0.4242380 0.6595494 2881 0.4242380 0.2403045 0.7754381 0.9919723 2.060550 0.0038286 0.0942424
flaregas-16 165 165 4.544816 0.7799691 2.788147 23.651645 2 0.7988789 0.5460589 0.0275443 0.4723405 0.3081889 0.4356846 0.5965224 11025 0.4356846 0.2200309 0.6885201 0.9805619 2.061274 0.1189093 0.1890535
flaregas-17 166 166 6.736979 0.8515655 2.973364 22.442065 3 0.8852945 0.5816453 0.0405842 0.3694369 0.2757042 0.3312828 0.4893099 12117 0.3312828 0.1484345 0.1825514 0.9831067 2.061229 0.1046861 0.1589567
flaregas-18 225 225 4.870536 0.7946838 3.019310 31.981029 2 0.8099476 0.5574694 0.0216468 0.4621243 0.3121526 0.4322940 0.5455873 15694 0.4322940 0.2053162 0.6230443 0.9738634 2.061262 0.1442541 0.1886018
flaregas-19 130 130 4.618498 0.7834794 2.620662 18.450632 2 0.6299565 0.5383962 0.0355269 0.4856388 0.3100489 0.3501701 0.6525232 7411 0.3501701 0.2165206 0.7419675 0.9840085 2.061261 0.0921376 0.1169911
flaregas-20 119 119 5.459180 0.8168223 2.861293 18.036618 2 0.6244773 0.5987067 0.0458755 0.5088171 0.3595004 0.3576836 0.5847843 4725 0.3576836 0.1831777 0.6996215 0.9824050 2.061229 0.0790310 0.1140045
flaregas-21 239 239 28.818345 0.9652999 4.357086 36.843558 14 0.9215666 0.7956022 0.1205789 0.4847310 0.4665360 0.1080745 0.2086128 2610 0.1080745 0.0347001 0.0801242 0.9508478 2.061303 0.1517184 0.2709731
flaregas-22 88 88 8.108700 0.8766757 3.195895 13.604011 4 0.9961611 0.7137938 0.0921443 0.4947832 0.4369549 0.3206944 0.4221106 2808 0.3206944 0.1233243 0.1146642 0.9829127 2.061244 0.0220420 0.1657149
flaregas-23 246 246 28.973900 0.9654862 4.460298 38.573434 16 0.8807671 0.8101780 0.1177801 0.5087523 0.4917949 0.1491681 0.1946247 3380 0.1491681 0.0345138 0.1389293 0.9456512 2.061378 0.1621872 0.3472792
flaregas-24 401 401 37.387162 0.9732529 4.798996 61.241357 20 0.7496266 0.8006385 0.0932348 0.4638321 0.4535202 0.1302554 0.1642840 5558 0.1302554 0.0267471 0.1326459 0.9216715 2.061103 0.2276307 0.4183033
flaregas-25 415 415 59.882167 0.9833005 5.184550 68.941248 36 0.6181893 0.8600383 0.1442944 0.6065978 0.5517742 0.0958120 0.1316840 2711 0.0958120 0.0166995 0.1515462 0.8895443 2.061407 0.3208694 0.4553101
flaregas-26 414 414 66.395387 0.9849387 5.217768 68.670111 38 0.6336633 0.8658951 0.1603753 0.6058342 0.5542743 0.0852522 0.1238530 2425 0.0852522 0.0150613 0.1263139 0.8883371 2.061407 0.3275444 0.4586746
flaregas-27 223 223 12.262297 0.9184492 3.740517 32.020001 6 0.8737199 0.6917696 0.0549879 0.4347468 0.3750955 0.2327347 0.3663969 7879 0.2327347 0.0815508 0.0862527 0.9665130 2.060918 0.1472204 0.2111420
flaregas-28 89 89 20.487573 0.9511899 3.778135 15.054468 10 0.9967909 0.8417111 0.2301974 0.5860519 0.5689563 0.1543455 0.2641543 856 0.1543455 0.0488101 0.2197981 0.9754881 2.061423 0.0059502 0.2068157
flaregas-29 216 216 2.195784 0.5445817 2.023634 27.457879 1 0.2482530 0.3764706 0.0101657 0.4024311 0.2137360 0.6720996 0.7063309 48123 0.6720996 0.4554183 0.7373780 0.9853563 2.060792 0.0992025 0.1068142
flaregas-30 116 116 2.523463 0.6037191 2.127191 15.683955 1 0.3663683 0.4474914 0.0217540 0.4277261 0.2617240 0.6249462 0.6705555 15963 0.6249462 0.3962809 0.6934581 0.9887809 2.061219 0.0790432 0.1013193
#show the dominant taxonomic group for each sample
temp <- sample_data(biom.16s.int)
temp$dominant <- tax_table(biom.16s.int)[dominant(biom.16s.int),"Species"]
knitr::kable(as.matrix(temp[,c("Culture_setup","dominant")]))
Culture_setup dominant
flaregas-1 ocean Lentibacter_uncultured_Rhodobacteraceae
flaregas-2 light+clean_gas Flavobacterium_uncultured_bacterium
flaregas-3 light+clean_gas Marivita_Marivita_sp.
flaregas-10 ocean Lentibacter_uncultured_Rhodobacteraceae
flaregas-11 ocean Lentibacter_uncultured_Rhodobacteraceae
flaregas-12 ocean Lentibacter_uncultured_Rhodobacteraceae
flaregas-13 light+clean_gas Phormidiaceae_cyanobacterium
flaregas-14 light+clean_gas Aureispira_marina
flaregas-15 light+clean_gas Aureispira_marina
flaregas-16 light+clean_gas Aureispira_marina
flaregas-17 light+used_gas Phormidiaceae_cyanobacterium
flaregas-18 light+used_gas Aureispira_marina
flaregas-19 light+used_gas Marivita_Marivita_sp.
flaregas-20 light+used_gas Marivita_Marivita_sp.
flaregas-21 dark+clean_gas Methylophaga_thalassica
flaregas-22 dark+clean_gas Methylomicrobium_uncultured_gamma
flaregas-23 dark+clean_gas Methylophaga_thalassica
flaregas-24 dark+clean_gas Methylophaga_thalassica
flaregas-25 ocean Lentibacter_uncultured_Rhodobacteraceae
flaregas-26 ocean Lentibacter_uncultured_Rhodobacteraceae
flaregas-27 dark+clean_gas Methylomicrobium_uncultured_gamma
flaregas-28 dark+clean_gas Saprospira_grandis
flaregas-29 dark+clean_gas Marivita_Marivita_sp.
flaregas-30 dark+clean_gas Marivita_Marivita_sp.
p <- plot_richness(biom.16s.int, "Time_point", "Culture_setup", measures=alpha_meas)
p$data$Time_point <- factor(p$data$Time_point, levels = c("0","5","7","11","14"))
pdf(paste0(wdir, "fig/alpha-diversity-times-16s-emu.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Time_point, y=value, color=NULL), alpha=0.1)
dev.off()
## png 
##   2
#boxplot_alpha(biom.16s.int, index = "chao1", x_var = "Time_point") + theme_minimal() + theme(legend.position = "none")

Sample richness in the different culture setups and different time points (minimap2)

alpha_meas = c("Observed", "Chao1", "Shannon", "Simpson")

p <- plot_richness(biom.16s.m.unfiltered, "Culture_setup", "Time_point", measures=alpha_meas)
pdf(paste0(wdir, "fig/alpha-diversity-cultures-16s-minimap2.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Culture_setup, y=value, color=NULL), alpha=0.1)
dev.off()
## png 
##   2
#boxplot_alpha(biom.16s.m.unfiltered, index = "chao1", x_var = "Culture_setup") + theme_minimal() + theme(legend.position = "none")

#https://microbiome.github.io/tutorials/Alphadiversity.html
tab <-microbiome::alpha(biom.16s.m.unfiltered, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
knitr::kable(as.matrix(tab))
observed chao1 diversity_inverse_simpson diversity_gini_simpson diversity_shannon diversity_fisher diversity_coverage evenness_camargo evenness_pielou evenness_simpson evenness_evar evenness_bulla dominance_dbp dominance_dmn dominance_absolute dominance_relative dominance_simpson dominance_core_abundance dominance_gini rarity_log_modulo_skewness rarity_low_abundance rarity_rare_abundance
flaregas-1 4315 8445.548 15.286749 0.9345839 5.281947 1207.0809 20 0.8620797 0.6310681 0.0035427 0.4566371 0.2752696 0.2456530 0.2742429 10285 0.2456530 0.0654161 0.2467517 0.9617968 2.061421 0.3704261 0.4843795
flaregas-2 1987 4116.146 22.767845 0.9560784 4.924868 541.5041 15 0.9276232 0.6484883 0.0114584 0.4167156 0.2742059 0.1694604 0.2537076 3508 0.1694604 0.0439216 0.0453601 0.9827436 2.061421 0.3003720 0.6296314
flaregas-3 931 2259.231 31.718452 0.9684726 4.992567 314.0958 19 0.8662062 0.7303069 0.0340692 0.4691053 0.3637613 0.1391199 0.2011435 803 0.1391199 0.0315274 0.1476091 0.9875918 2.061421 0.3210326 0.7619543
flaregas-10 2298 5702.450 16.161425 0.9381243 5.209466 781.9143 21 0.9028564 0.6730754 0.0070328 0.5294223 0.3304788 0.2381905 0.2671336 3333 0.2381905 0.0618757 0.2400486 0.9718780 2.061423 0.3801901 0.4884585
flaregas-11 3563 8438.965 63.305943 0.9842037 6.300107 1332.8309 72 0.9212405 0.7703388 0.0177676 0.5320364 0.3890814 0.1070872 0.1415777 1925 0.1070872 0.0157963 0.1231086 0.9461639 2.061423 0.5018914 0.5826658
flaregas-12 6587 12834.554 55.341901 0.9819305 6.349247 1948.3011 68 0.8285402 0.7220918 0.0084017 0.4451133 0.3134241 0.1155695 0.1570329 6394 0.1155695 0.0180695 0.1242635 0.9302238 2.061421 0.4982829 0.5931750
flaregas-13 1349 3023.204 15.658169 0.9361356 3.979428 296.1536 6 0.9714376 0.5521524 0.0116072 0.3485270 0.1775566 0.1786022 0.3019877 4978 0.1786022 0.0638644 0.1423292 0.9935643 2.061400 0.1590126 0.2386625
flaregas-14 2105 4383.004 7.581352 0.8680974 3.590428 403.2499 4 0.8841795 0.4692099 0.0036016 0.3184866 0.1423329 0.3376338 0.4283211 25045 0.3376338 0.1319026 0.1830327 0.9933766 2.061407 0.1515409 0.1941546
flaregas-15 774 1989.101 8.703364 0.8851019 3.715408 218.5239 4 0.9642672 0.5585760 0.0112447 0.4723425 0.2480729 0.3041758 0.3906932 2229 0.3041758 0.1148981 0.2463155 0.9936617 2.061412 0.2083788 0.2194323
flaregas-16 1524 3329.817 7.788971 0.8716133 3.788956 353.0381 4 0.9236048 0.5169748 0.0051109 0.3860155 0.2056010 0.3329758 0.4226070 8693 0.3329758 0.1283867 0.1938944 0.9915259 2.061411 0.2021680 0.2710384
flaregas-17 1745 4175.615 15.314810 0.9347037 3.990306 379.7383 6 0.9748314 0.5345704 0.0087764 0.3631715 0.1668146 0.1670339 0.3014777 6217 0.1670339 0.0652963 0.1302526 0.9923364 2.061407 0.1630306 0.2489790
flaregas-18 1756 3646.043 7.730096 0.8706355 3.864414 384.5041 4 0.8923293 0.5172696 0.0044021 0.3496265 0.1959522 0.3388035 0.4228217 12408 0.3388035 0.1293645 0.1602818 0.9912086 2.061416 0.1974988 0.2674276
flaregas-19 1417 3100.050 11.228064 0.9109375 3.857661 337.9353 4 0.9173522 0.5316295 0.0079238 0.4020619 0.2038845 0.2263304 0.3497255 4989 0.2263304 0.0890625 0.3596607 0.9918641 2.061417 0.2070045 0.2215216
flaregas-20 1362 2793.256 16.655080 0.9399583 4.344194 370.0748 6 0.9047069 0.6019633 0.0122284 0.4394832 0.2576154 0.1592927 0.2830782 2279 0.1592927 0.0600417 0.3619207 0.9889622 2.061418 0.2472216 0.2664430
flaregas-21 2055 3721.157 50.490371 0.9801942 5.217657 529.3173 21 0.8893018 0.6840110 0.0245695 0.3697369 0.2648962 0.0588563 0.1155665 1481 0.0588563 0.0198058 0.0583794 0.9831396 2.061410 0.3097008 0.4206176
flaregas-22 1066 2646.504 15.908887 0.9371421 4.407192 308.5543 7 0.9403829 0.6321574 0.0149239 0.4401330 0.2874822 0.1723409 0.3289279 1630 0.1723409 0.0628579 0.0704166 0.9898860 2.061421 0.2544936 0.3418270
flaregas-23 2050 4228.650 53.969719 0.9814711 5.391625 535.9633 26 0.9035793 0.7070432 0.0263267 0.3618201 0.2839977 0.0826605 0.1485890 1986 0.0826605 0.0185289 0.0701740 0.9815394 2.061410 0.3318072 0.5316324
flaregas-24 2739 4807.189 59.213644 0.9831120 5.514769 649.5860 31 0.8502754 0.6967185 0.0216187 0.3258944 0.2588662 0.0825517 0.1426794 3582 0.0825517 0.0168880 0.0679634 0.9790365 2.061403 0.3179000 0.5762024
flaregas-25 5511 11670.811 72.584285 0.9862229 6.672463 1934.4862 114 0.8167415 0.7745617 0.0131708 0.5002829 0.3782221 0.1042931 0.1304458 3282 0.1042931 0.0137771 0.1259017 0.9215018 2.061423 0.5673202 0.6504496
flaregas-26 5460 10824.639 89.489842 0.9888255 6.720246 1912.8569 117 0.8065763 0.7809513 0.0163901 0.4993797 0.3821001 0.0886553 0.1205073 2775 0.0886553 0.0111745 0.1097409 0.9214100 2.061423 0.5704610 0.6566883
flaregas-27 2216 4300.333 21.730904 0.9539826 4.530733 528.6524 8 0.7515981 0.5881427 0.0098064 0.3909616 0.2174082 0.1296861 0.2391033 4466 0.1296861 0.0460174 0.0645527 0.9863596 2.061412 0.2435752 0.3862996
flaregas-28 1019 2374.803 4.567576 0.7810655 2.909577 236.4565 2 0.9764584 0.4200599 0.0044824 0.4329404 0.1798135 0.3684029 0.6516478 6394 0.3684029 0.2189345 0.0467274 0.9949559 2.061419 0.1496889 0.1669740
flaregas-29 1929 4190.534 8.079105 0.8762239 3.281015 364.7623 3 0.9760829 0.4337238 0.0041882 0.3402545 0.1270341 0.2398770 0.4579773 17239 0.2398770 0.1237761 0.6882531 0.9946194 2.061414 0.1338463 0.1716250
flaregas-30 1140 2177.682 9.508020 0.8948256 3.495112 243.9462 4 0.9840635 0.4965506 0.0083404 0.3623653 0.1691140 0.2259636 0.4202265 5845 0.2259636 0.1051744 0.6286388 0.9949990 2.061408 0.1481811 0.1886574
#show the dominant taxonomic group for each sample
temp <- sample_data(biom.16s.m.unfiltered)
temp$dominant <- tax_table(biom.16s.m.unfiltered)[dominant(biom.16s.m.unfiltered),"Species"]
knitr::kable(as.matrix(temp[,c("Culture_setup","dominant")]))
Culture_setup dominant
flaregas-1 ocean Lentibacter_algarum
flaregas-2 light+clean_gas Flavobacterium_uncultured_bacterium
flaregas-3 light+clean_gas Psychroflexus_uncultured_organism
flaregas-10 ocean Lentibacter_algarum
flaregas-11 ocean Lentibacter_algarum
flaregas-12 ocean Lentibacter_algarum
flaregas-13 light+clean_gas Phormidiaceae_cyanobacterium
flaregas-14 light+clean_gas Aureispira_marina
flaregas-15 light+clean_gas Aureispira_marina
flaregas-16 light+clean_gas Aureispira_marina
flaregas-17 light+used_gas Saprospiraceae_uncultured_uncultured_marine
flaregas-18 light+used_gas Aureispira_marina
flaregas-19 light+used_gas Aureispira_marina
flaregas-20 light+used_gas Aureispira_marina
flaregas-21 dark+clean_gas NS3a_marine_group_uncultured_bacterium
flaregas-22 dark+clean_gas Methylomicrobium_uncultured_bacterium
flaregas-23 dark+clean_gas Methylophaga_thalassica
flaregas-24 dark+clean_gas Methylophaga_thalassica
flaregas-25 ocean Lentibacter_algarum
flaregas-26 ocean Lentibacter_algarum
flaregas-27 dark+clean_gas Methylomicrobium_uncultured_bacterium
flaregas-28 dark+clean_gas Methylomicrobium_uncultured_bacterium
flaregas-29 dark+clean_gas Marivita_Marivita_sp.
flaregas-30 dark+clean_gas Marivita_Marivita_sp.
p <- plot_richness(biom.16s.m.unfiltered, "Time_point", "Culture_setup", measures=alpha_meas)
p$data$Time_point <- factor(p$data$Time_point, levels = c("0","5","7","11","14"))
pdf(paste0(wdir, "fig/alpha-diversity-times-16s-minimap2.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Time_point, y=value, color=NULL), alpha=0.1)
dev.off()
## png 
##   2
#boxplot_alpha(biom.16s.m.unfiltered, index = "chao1", x_var = "Time_point") + theme_minimal() + theme(legend.position = "none")

Beta diversity measure

This should not be calculated with low abundancy removed counts (so here we should use minimap2 abundancies).

PCA (emu)

# apply a singular value decomposition to the dataset
# do not use princomp function in R!!
pcx <- prcomp(t(otu_table(biom.16s.clr)), scale. = T)

# generate a scree plot
#http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/
fviz_eig(pcx, addlabels = TRUE)

# generate biplot (check https://colorbrewer2.org/ for selecting colors)
pdf(paste0(wdir,"fig/clr-pca-16s-emu.pdf"), width = 30, height = 8)
p1 <- ggbiplot(pcx, groups = meta$Culture_setup, var.axes = F, choices = c(1,3), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_brewer(type = "div", palette = 1) + labs(color = "Group")
p2 <- ggbiplot(pcx, groups = meta$Time_point, var.axes = F, choices = c(1,4), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_manual(values=c('#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00')) + labs(color = "Group")
plot(ggarrange(p1, p2, ncol = 2, nrow = 1))
## Too few points to calculate an ellipse
dev.off()

PCA (minimap2)

#biplot
pcx <- prcomp(t(otu_table(biom.16s.m.clr)), scale. = T)
fviz_eig(pcx, addlabels = TRUE)

# generate biplot (check https://colorbrewer2.org/ for selecting colors)
p1 <- ggbiplot(pcx, groups = meta$Culture_setup, var.axes = F, choices = c(1,2), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_brewer(type = "div", palette = 1) + labs(color = "Group")
p2 <- ggbiplot(pcx, groups = meta$Time_point, var.axes = F, choices = c(1,2), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_manual(values=c('#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00')) + labs(color = "Group")
pdf(paste0(wdir,"fig/clr-pca-16s-minimap2.pdf"), width = 30, height = 8)
plot(ggarrange(p1, p2, ncol = 2, nrow = 1))
## Too few points to calculate an ellipse
dev.off()


Hierarchical Clustering

# average-link clustering
# Use unifrac on original compositional data: If your data is truly compositional (relative abundances sum to 1), consider using unifrac directly on the untransformed data
biom.hclust <- hclust(phyloseq::distance(biom.16s.m, method = "unifrac"), method="average")
biom.hclust$labels <- sample_data(biom.16s)$Label
plot(biom.hclust)

#biom.hclust <- hclust(phyloseq::distance(biom.16s, "wunifrac"), method="average")
#biom.hclust$labels <- sample_data(biom.16s)$Label
#plot(biom.hclust)
#biom.hclust <- hclust(phyloseq::distance(biom.16s, "bray"), method="average")
#biom.hclust$labels <- sample_data(biom.16s)$Label
#plot(biom.hclust)

Multivariate community comparison

PERMANOVA

This should not be calculated with low abundancy removed counts (so here we should use minimap2 abundancies).

#https://microbiome.github.io/tutorials/PERMANOVA.html
plot_landscape(biom.16s.m.rel, method = "NMDS", distance = "bray", col = "Culture_setup", size = 3)

plot_landscape(biom.16s.m.rel, method = "NMDS", distance = "bray", col = "Time_point", size = 3)

#https://deneflab.github.io/MicrobeMiseq/demos/mothur_2_phyloseq.html
set.seed(1)

# Calculate bray curtis distance matrix
#biom.16s.dist <- phyloseq::distance(biom.16s.m, method = "bray")
#biom.16s.dist <- phyloseq::distance(biom.16s.m, method = "unifrac")
biom.16s.dist <- phyloseq::distance(biom.16s.m, method = "wunifrac")

# make a data frame from the sample_data
#sampledf <- data.frame(sample_data(biom.16s.m))
sampledf <- meta(biom.16s.m)

#test the hypothesis that the different culture setups have different centroids
# Adonis test (H0 .. different cultures have the same centroid)
perm <- adonis2(biom.16s.dist ~ Culture_setup, data = sampledf)
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = biom.16s.dist ~ Culture_setup, data = sampledf)
##               Df SumOfSqs      R2      F Pr(>F)    
## Culture_setup  3  0.43518 0.44116 5.2629  0.001 ***
## Residual      20  0.55125 0.55884                  
## Total         23  0.98643 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Homogeneity of dispersion test (H0 .. different cultures have the same dispersion)
beta <- betadisper(biom.16s.dist, sampledf$Culture_setup)
permutest(beta) #alternatively use anova(beta)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     3 0.021279 0.0070930 0.8965    999  0.441
## Residuals 20 0.158234 0.0079117
#test the hypothesis that the different time points have different centroids
# Adonis test (H0 .. different different time points have the same centroid)
perm <- adonis2(biom.16s.dist ~ Time_point, data = sampledf)
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = biom.16s.dist ~ Time_point, data = sampledf)
##            Df SumOfSqs      R2     F Pr(>F)    
## Time_point  4  0.39174 0.39713 3.129  0.001 ***
## Residual   19  0.59468 0.60287                 
## Total      23  0.98643 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Homogeneity of dispersion test (H0 .. different time points have the same dispersion)
beta <- betadisper(biom.16s.dist, sampledf$Time_point)
permutest(beta)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)  
## Groups     4 0.054529 0.0136322 2.8825    999  0.042 *
## Residuals 19 0.089858 0.0047294                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Univariate community comparisons

ANOVA-like differential expression of genera (minimap2)

set.seed(12345)
# test differential expression: (light.clean_gas+light.used_gas) - dark.clean_gas
meta.sub.1 <- meta %>% dplyr::filter(Culture_setup == "light+clean_gas" | Culture_setup == "light+used_gas" | Culture_setup == "dark+clean_gas") %>% mutate(Culture_setup = replace(Culture_setup, Culture_setup=="light+used_gas", "light+clean_gas"))
mm <- model.matrix(~ Inoculum + Culture_setup, meta.sub.1)
counts.sub <- otu_table(biom.16s.m.genus)[,meta.sub.1$Sample]
x <- aldex.clr(counts.sub, mm, mc.samples=100, verbose=T, gamma=1e-3)
## integer matrix provided
## using all features for denominator
## operating in serial mode
## removed rows with sums equal to zero
## data format is OK
## dirichlet samples complete
## aldex.scaleSim: adjusting samples to reflect scale uncertainty.
## sampling from the default scale model.
glm.test <- aldex.glm(x, mm, fdr.method='BH')
## running tests for each MC instance:
## |------------(25%)----------(50%)----------(75%)----------|
glm.eff<- aldex.glm.effect(x)
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
aldex.glm.plot(glm.test, eff=glm.eff, contrast='Culture_setuplight+clean_gas', type='MW', test='effect', cutoff.effect = 1)

aldex.glm.plot(glm.test, eff=glm.eff, contrast='Culture_setuplight+clean_gas', type='MA', test='effect', cutoff.effect = 1)

#aldex.glm.plot(glm.test, eff=glm.eff, contrast='Culture_setuplight+clean_gas', type='volcano', test='effect', cutoff.effect = 1)

#filter genera that where detected with Emu (filter low abundant genera)
glm.eff.lightVSdark <- glm.eff$`Culture_setuplight+clean_gas`[biom.16s.genus.minimap2emu$SpeciesID,]

# higher relative abundance under light
glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1.5,] %>% nrow()
## [1] 12
knitr::kable(as.matrix(tax_table(biom.16s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1.5,] %>% row.names(),]))
Kingdom Phylum Class Order Family Genus Species
KY770139.1.1394 Bacteria Proteobacteria Alphaproteobacteria Rhodospirillales Thalassospiraceae Thalassospira NA
EU672857.1.1492 Bacteria Proteobacteria Gammaproteobacteria Cellvibrionales Halieaceae Congregibacter NA
JN428535.1.1439 Bacteria Bacteroidota Bacteroidia Cytophagales Flammeovirgaceae Flexithrix NA
KU578816.1.1368 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae NS2b_marine_group NA
AB245935.1.1459 Bacteria Bacteroidota Bacteroidia Chitinophagales Saprospiraceae Aureispira NA
HM057705.1.1470 Bacteria Cyanobacteria Cyanobacteriia Synechococcales Cyanobiaceae Cyanobium_PCC-6307 NA
KU951800.1.1271 Bacteria Cyanobacteria Cyanobacteriia Synechococcales Synechococcales_Incertae_Sedis Schizothrix_LEGE_07164 NA
HM217045.1.1446 Bacteria Cyanobacteria Cyanobacteriia Oxyphotobacteria_Incertae_Sedis Oxyphotobacteria_Incertae_Sedis_Unknown_Family Leptolyngbya_LEGE-06070 NA
EF654088.1.1482 Bacteria Cyanobacteria Cyanobacteriia Cyanobacteriales Geitlerinemaceae Geitlerinema_PCC-7105 NA
KY379855.1.2514 Bacteria Cyanobacteria Cyanobacteriia Cyanobacteriales Microcystaceae Synechocystis_PCC-6803 NA
KU951847.1.1466 Bacteria Cyanobacteria Cyanobacteriia Cyanobacteriales Cyanobacteriaceae Symphothece_PCC-7002 NA
AB062106.1.1479 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Porphyrobacter NA
# higher relative abundance in the dark
glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1.5,] %>% nrow()
## [1] 16
knitr::kable(as.matrix(tax_table(biom.16s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1.5,] %>% row.names(),]))
Kingdom Phylum Class Order Family Genus Species
KC247325.1.1400 Bacteria Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Tropicibacter NA
KY437089.1.1426 Bacteria Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Youngimonas NA
AY546508.1.1498 Bacteria Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylomonadaceae_uncultured NA
AY007296.1.1499 Bacteria Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylosarcina NA
AB930603.1.1457 Bacteria Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylomicrobium NA
CP002738.392605.394125 Bacteria Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylomonas NA
KC963966.1.1493 Bacteria Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylobacter NA
ATTE01000001.3195237.3196755 Bacteria Proteobacteria Gammaproteobacteria Thiotrichales Thiotrichaceae Leucothrix NA
AB681780.1.1463 Bacteria Proteobacteria Gammaproteobacteria Nitrosococcales Methylophagaceae Methylophaga NA
KC534174.1.1363 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Aequorivita NA
JQ197367.1.1350 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Aquibacter NA
LN875339.1.1446 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Flavobacteriaceae NA
HQ144198.1.1471 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Mariniflexile NA
FJ360684.1.1470 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Meridianimaribacter NA
KJ475509.1.1443 Bacteria Myxococcota Polyangia Nannocystales Nannocystaceae Nannocystaceae_uncultured NA
AB607876.1.1417 Bacteria Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Antarctobacter NA
temp.1 <- glm.eff.lightVSdark[abs(glm.eff.lightVSdark$effect) >= 1.5,] %>% rownames_to_column(var = "SpeciesID")
temp.2 <- tax_table(biom.16s.m.genus) %>% as.data.frame() %>% rownames_to_column(var = "SpeciesID")
res_sig.aldex <- inner_join(temp.1, temp.2, by = "SpeciesID")
pdf(paste0(wdir, "fig/aldex-light_clean_usedVSdark-scatter-genus-16s.pdf"), width = 35, height = 13)
ggplot(res_sig.aldex, aes(x=Genus, y=diff.btw, color=Phylum)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) + ylab("logFC")
dev.off()
## png 
##   2
for(i in glm.eff.lightVSdark[abs(glm.eff.lightVSdark$effect) >= 1,] %>% row.names()) {
  de.genus <- tax_table(biom.16s.m.genus)[i,"Genus"]
  p <- boxplot_abundance(biom.16s.m.genus, x = "Culture_setup", y = i) + scale_y_log10() + ggtitle(de.genus)
  print(p)
}

set.seed(12345)
# test differential expression: ((light.clean_gas+light.used_gas) - ocean) AND (dark.clean_gas - ocean)
meta.sub.1 <- meta %>% mutate(Culture_setup = replace(Culture_setup, Culture_setup=="light+used_gas", "light+clean_gas"))
meta.sub.1$Culture_setup <- factor(meta.sub.1$Culture_setup, levels = c("ocean", "dark+clean_gas", "light+clean_gas"))
mm <- model.matrix(~0 + Inoculum + Culture_setup, meta.sub.1)
counts.sub <- otu_table(biom.16s.m.genus)[,meta.sub.1$Sample]
x <- aldex.clr(counts.sub, mm, mc.samples=100, verbose=T, gamma=1e-3)
## integer matrix provided
## using all features for denominator
## operating in serial mode
## removed rows with sums equal to zero
## data format is OK
## dirichlet samples complete
## aldex.scaleSim: adjusting samples to reflect scale uncertainty.
## sampling from the default scale model.
glm.test.1 <- aldex.glm(x, mm, fdr.method='BH')
## running tests for each MC instance:
## |------------(25%)----------(50%)----------(75%)----------|
glm.eff.1<- aldex.glm.effect(x)
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setuplight+clean_gas', type='MW', test='effect', cutoff.effect = 1.5)

aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setuplight+clean_gas', type='MA', test='effect', cutoff.effect = 1.5)

#aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setuplight+clean_gas', type='volcano', test='fdr', cutoff.pval = 0.1)
aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setupdark+clean_gas', type='MW', test='effect', cutoff.effect = 1.5)

aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setupdark+clean_gas', type='MA', test='effect', cutoff.effect = 1.5)

#aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setupdark+clean_gas', type='volcano', test='fdr', cutoff.pval = 0.1)

#filter genera that where detected with Emu (filter low abundant genera)
glm.eff.lightVSocean <- glm.eff.1$`Culture_setuplight+clean_gas`[biom.16s.genus.minimap2emu$SpeciesID,]
glm.eff.darkVSocean <- glm.eff.1$`Culture_setupdark+clean_gas`[biom.16s.genus.minimap2emu$SpeciesID,]

# higher relative abundance in light-incubated cultures than in the ocean
glm.eff.lightVSocean[glm.eff.lightVSocean$effect >= 1.5,] %>% nrow()
## [1] 9
knitr::kable(as.matrix(tax_table(biom.16s.m.genus)[glm.eff.lightVSocean[glm.eff.lightVSocean$effect >= 1.5,] %>% row.names(),]))
Kingdom Phylum Class Order Family Genus Species
EU979477.1.1429 Bacteria Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Pseudorhodobacter NA
KY770139.1.1394 Bacteria Proteobacteria Alphaproteobacteria Rhodospirillales Thalassospiraceae Thalassospira NA
AB245935.1.1459 Bacteria Bacteroidota Bacteroidia Chitinophagales Saprospiraceae Aureispira NA
KU951800.1.1271 Bacteria Cyanobacteria Cyanobacteriia Synechococcales Synechococcales_Incertae_Sedis Schizothrix_LEGE_07164 NA
HM217045.1.1446 Bacteria Cyanobacteria Cyanobacteriia Oxyphotobacteria_Incertae_Sedis Oxyphotobacteria_Incertae_Sedis_Unknown_Family Leptolyngbya_LEGE-06070 NA
EF654088.1.1482 Bacteria Cyanobacteria Cyanobacteriia Cyanobacteriales Geitlerinemaceae Geitlerinema_PCC-7105 NA
KY379855.1.2514 Bacteria Cyanobacteria Cyanobacteriia Cyanobacteriales Microcystaceae Synechocystis_PCC-6803 NA
KU951847.1.1466 Bacteria Cyanobacteria Cyanobacteriia Cyanobacteriales Cyanobacteriaceae Symphothece_PCC-7002 NA
AB062106.1.1479 Bacteria Proteobacteria Alphaproteobacteria Sphingomonadales Sphingomonadaceae Porphyrobacter NA
# higher relative abundance in dark-incubated cultures than in the ocean
glm.eff.darkVSocean[glm.eff.darkVSocean$effect >= 1.5,] %>% nrow()
## [1] 18
knitr::kable(as.matrix(tax_table(biom.16s.m.genus)[glm.eff.darkVSocean[glm.eff.darkVSocean$effect >= 1.5,] %>% row.names(),]))
Kingdom Phylum Class Order Family Genus Species
KC247325.1.1400 Bacteria Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Tropicibacter NA
KY437089.1.1426 Bacteria Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Youngimonas NA
AY546508.1.1498 Bacteria Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylomonadaceae_uncultured NA
AY007296.1.1499 Bacteria Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylosarcina NA
AB930603.1.1457 Bacteria Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylomicrobium NA
CP002738.392605.394125 Bacteria Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylomonas NA
KC963966.1.1493 Bacteria Methanotroph Gammaproteobacteria Methylococcales Methylomonadaceae Methylobacter NA
AB053128.1.1530 Bacteria Proteobacteria Gammaproteobacteria Oceanospirillales Alcanivoracaceae1 Alcanivorax NA
ATTE01000001.3195237.3196755 Bacteria Proteobacteria Gammaproteobacteria Thiotrichales Thiotrichaceae Leucothrix NA
AB681780.1.1463 Bacteria Proteobacteria Gammaproteobacteria Nitrosococcales Methylophagaceae Methylophaga NA
KC534174.1.1363 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Aequorivita NA
AB681705.1.1449 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Mesoflavibacter NA
LN875339.1.1446 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Flavobacteriaceae NA
HQ144198.1.1471 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Mariniflexile NA
FJ360684.1.1470 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Meridianimaribacter NA
KJ475509.1.1443 Bacteria Myxococcota Polyangia Nannocystales Nannocystaceae Nannocystaceae_uncultured NA
AB607876.1.1417 Bacteria Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Antarctobacter NA
FJ232451.1.1429 Bacteria Proteobacteria Alphaproteobacteria Rhodobacterales Rhodobacteraceae Sedimentitalea NA
temp.1 <- glm.eff.lightVSocean[glm.eff.lightVSocean$effect >= 1.5,] %>% rownames_to_column(var = "SpeciesID")
temp.2 <- tax_table(biom.16s.m.genus) %>% as.data.frame() %>% rownames_to_column(var = "SpeciesID")
res_sig.aldex.lightVSocean <- inner_join(temp.1, temp.2, by = "SpeciesID")
pdf(paste0(wdir, "fig/aldex-lightVSocean-scatter-genus-16s.pdf"), width = 35, height = 13)
ggplot(res_sig.aldex.lightVSocean, aes(x=reorder(Genus, diff.btw), y=diff.btw, color=Phylum)) +
    geom_point(size=3, width = 0.2) + coord_flip() +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) + labs(y = "logFC", x = "Genus")
dev.off()
## png 
##   2
temp.1 <- glm.eff.darkVSocean[glm.eff.darkVSocean$effect >= 1.5, ] %>% rownames_to_column(var = "SpeciesID")
temp.2 <- tax_table(biom.16s.m.genus) %>% as.data.frame() %>% rownames_to_column(var = "SpeciesID")
res_sig.aldex.darkVSocean <- inner_join(temp.1, temp.2, by = "SpeciesID")
pdf(paste0(wdir, "fig/aldex-darkVSocean-scatter-genus-16s.pdf"), width = 35, height = 13)
ggplot(res_sig.aldex.darkVSocean, aes(x=reorder(Genus, diff.btw), y=diff.btw, color=Phylum)) +
    geom_point(size=3, width = 0.2) + coord_flip() +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) + labs(y = "logFC", x = "Genus")
dev.off()
## png 
##   2

Differential abundance testing of genera with DESeq2 (minimap2)

As DESeq2 expects raw counts we will use here the minimap2 counts instead of emu counts.

#run DESeq2
#https://micca.readthedocs.io/en/latest/phyloseq.html
sample_data(biom.16s.m.genus)$Culture_setup <- as.factor(sample_data(biom.16s.m.genus)$Culture_setup)
ds <- phyloseq_to_deseq2(biom.16s.m.genus, ~ Inoculum + Culture_setup)
## converting counts to integer mode
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
ds <- DESeq(ds)
## estimating size factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## fitting model and testing
alpha <- 0.001

# test differential expression: light.clean_gas - dark.clean_gas
res.1 <- results(ds, contrast=c("Culture_setup", "light+clean_gas", "dark+clean_gas"), alpha=alpha)
summary(res.1)
## 
## out of 1115 with nonzero total read count
## adjusted p-value < 0.001
## LFC > 0 (up)       : 35, 3.1%
## LFC < 0 (down)     : 19, 1.7%
## outliers [1]       : 15, 1.3%
## low counts [2]     : 713, 64%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res.1 <- res.1[order(res.1$padj, na.last=NA), ]
res_sig <- res.1[(res.1$padj < alpha), ]
res_sig
## log2 fold change (MLE): Culture_setup light+clean_gas vs dark+clean_gas 
## Wald test p-value: Culture setup light.clean gas vs dark.clean gas 
## DataFrame with 54 rows and 6 columns
##                         baseMean log2FoldChange     lfcSE      stat      pvalue
##                        <numeric>      <numeric> <numeric> <numeric>   <numeric>
## HM057705.1.1470         368.2061        6.96042  0.666272  10.44683 1.51512e-25
## KC247325.1.1400          59.2219       -4.20854  0.513847  -8.19025 2.60681e-16
## KY770139.1.1394         353.2085        7.01789  0.894119   7.84895 4.19530e-15
## EF654088.1.1482        2180.9272        9.17469  1.225435   7.48688 7.05283e-14
## KJ728848.1.1458          20.0779        6.89193  0.981590   7.02119 2.19986e-12
## ...                          ...            ...       ...       ...         ...
## FJ232451.1.1429          4.59764       -2.83589  0.709342  -3.99792 6.39012e-05
## KF733610.1.1535         21.18703        4.95466  1.250006   3.96371 7.37936e-05
## AB053128.1.1530         21.91235       -3.30114  0.857355  -3.85038 1.17936e-04
## AUHJ01000047.4103.5616   3.85097        4.19135  1.095814   3.82487 1.30839e-04
## AB238790.1.1469          8.87191        3.48769  0.915463   3.80976 1.39103e-04
##                               padj
##                          <numeric>
## HM057705.1.1470        5.86352e-23
## KC247325.1.1400        5.04418e-14
## KY770139.1.1394        5.41194e-13
## EF654088.1.1482        6.82362e-12
## KJ728848.1.1458        1.70269e-10
## ...                            ...
## FJ232451.1.1429        0.000494596
## KF733610.1.1535        0.000559963
## AB053128.1.1530        0.000877715
## AUHJ01000047.4103.5616 0.000955372
## AB238790.1.1469        0.000996903
DESeq2::plotMA(res.1, alpha = alpha, ylim = c(-9,9))

res.1 %>% as.data.frame %>% filter(padj<alpha) %>% summarize(up = sum(log2FoldChange>1), down = sum(log2FoldChange<1))
##   up down
## 1 35   19
# up down
#1 35   19

res_sig <- cbind(as(res_sig, "data.frame"), as(tax_table(biom.16s.m)[rownames(res_sig), ], "matrix"))
pdf(paste0(wdir, "fig/deseq2-light_cleanVSdark_clean-scatter-genus-16s.pdf"), width = 15, height = 13)
ggplot(res_sig, aes(x=Genus, y=log2FoldChange, color=Phylum)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png 
##   2
#View(cbind(as(res_sig, "data.frame"), as(otu_table(biom.16s.m)[rownames(res_sig), ], "matrix")))

#volcano plot
volDat <- as.data.frame(res.1)
volDat <- volDat[!is.na(volDat$padj), ]
volDat$DGE <- volDat$padj < alpha
pdf(paste0(wdir, "fig/deseq2-light_cleanVSdark_clean-volcano-genus-16s.pdf"), width = 15, height = 13)
ggplot(volDat, aes(x = log2FoldChange, y = -log10(padj), color = DGE)) + 
  geom_point(alpha = 0.50) + 
  scale_color_manual(values=c("TRUE" = "red", "FALSE" = "gray"))
dev.off
## function (which = dev.cur()) 
## {
##     if (which == 1) 
##         stop("cannot shut down device 1 (the null device)")
##     .External(C_devoff, as.integer(which))
##     dev.cur()
## }
## <bytecode: 0x5625e9ecc750>
## <environment: namespace:grDevices>


#run DESeq2
# test differential expression: light.clean_gas - light.used_gas
res.2 <- results(ds, contrast=c("Culture_setup", "light+clean_gas", "light+used_gas"), alpha=alpha)
summary(res.2)
## 
## out of 1115 with nonzero total read count
## adjusted p-value < 0.001
## LFC > 0 (up)       : 0, 0%
## LFC < 0 (down)     : 1, 0.09%
## outliers [1]       : 15, 1.3%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res.2 <- res.2[order(res.2$padj, na.last=NA), ]
DESeq2::plotMA(res.2, alpha = alpha, ylim = c(-9,9))

res.2 %>% as.data.frame %>% filter(padj<alpha) %>% summarize(up = sum(log2FoldChange>1), down = sum(log2FoldChange<1))
##   up down
## 1  0    1
#  up down
#1  0    1
res_sig <- res.2[(res.2$padj < alpha), ]
res_sig <- cbind(as(res_sig, "data.frame"), as(tax_table(biom.16s.m.genus)[rownames(res_sig), ], "matrix"))
knitr::kable(as.matrix(res_sig %>% as.data.frame %>% filter(padj < alpha & log2FoldChange > 1) %>% dplyr::select(Genus, log2FoldChange, padj)))
Genus log2FoldChange padj
knitr::kable(as.matrix(res_sig %>% as.data.frame %>% filter(padj < alpha & log2FoldChange < 1) %>% dplyr::select(Genus, log2FoldChange, padj)))
Genus log2FoldChange padj
JF748732.1.1472 Neptunomonas -3.065593 0.0007264315

Differential abundance testing of genera with edgeR (minimap2)

As edgeR expects raw counts we will use here the minimap2 counts instead of emu counts.

#get genus counts
biom.16s.m.genus <- tax_glom(biom.16s.m, taxrank = "Genus")
er <- phyloseq2edgeR(biom.16s.m.genus)
#normalize
er <- edgeR::calcNormFactors(er, method = "TMM")
#metadata
meta.sub <- as.data.frame(sample_data(biom.16s.m.genus)[,c(2,3,8)])
meta.sub <- meta.sub %>% as_tibble() %>% mutate(across(.cols = 3, .fns = as.integer)) %>% mutate(across(.cols = 1:2, .fns = make.names))
#estimate dispersion
design <- model.matrix(~0 + Culture_setup + Inoculum, data=meta.sub)
er <- edgeR::estimateDisp(er, design)
plotBCV(er)

#differential expression
fit <- glmQLFit(er, design, robust = TRUE)
plotQLDisp(fit)

# test differential expression: light.clean_gas - dark.clean_gas
qlf <- glmQLFTest(fit, contrast = c(-1,1,0,0,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.001)) #FDR<0.001
##        -1*Culture_setupdark.clean_gas 1*Culture_setuplight.clean_gas
## Down                                                              11
## NotSig                                                          1089
## Up                                                                15
#       -1*Culture_setupdark.clean_gas 1*Culture_setuplight.clean_gas
#Down                                                              34
#NotSig                                                          1040
#Up                                                                41
dge.1 <- topTags(qlf, n = Inf, p.value = 0.001)$table
dge.1 %>% filter(FDR<0.001 & logFC > 1) %>% nrow()
## [1] 15
knitr::kable(as.matrix(dge.1 %>% filter(FDR<0.001 & logFC > 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
HM057705.1.1470 Cyanobium_PCC-6307 6.170126 1.711499e-06
KY770139.1.1394 Thalassospira 6.969588 7.424660e-05
KJ728848.1.1458 Wenyingzhuangia 6.292215 8.027002e-05
CP011456.3050785.3052272 Aphanizomenon_NIES81 6.556994 8.027002e-05
EF654088.1.1482 Geitlerinema_PCC-7105 8.513339 8.027002e-05
KU951800.1.1271 Schizothrix_LEGE_07164 5.895140 9.999445e-05
JN428535.1.1439 Flexithrix 8.106133 1.046041e-04
KY379855.1.2514 Synechocystis_PCC-6803 7.440222 1.130669e-04
KY807915.1.1369 Oxyphotobacteria_Incertae_Sedis_Unknown_Family_Unknown_Family 8.295968 1.815318e-04
HM217045.1.1446 Leptolyngbya_LEGE-06070 6.937264 3.386363e-04
EU672857.1.1492 Congregibacter 4.725332 3.626087e-04
KU951847.1.1466 Symphothece_PCC-7002 7.948037 3.922224e-04
HQ674537.1.1454 Holosporaceae_uncultured 6.045458 6.340628e-04
AB062106.1.1479 Porphyrobacter 4.993965 6.700702e-04
GQ441334.1.1453 Spirosomaceae_uncultured 4.724273 7.052793e-04
dge.1 %>% filter(FDR<0.001 & logFC < 1) %>% nrow()
## [1] 11
knitr::kable(as.matrix(dge.1 %>% filter(FDR<0.001 & logFC < 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
CP018572.2205903.2207370 Marivivens -7.271044 2.249065e-05
KC247325.1.1400 Tropicibacter -4.721854 2.249065e-05
EF471465.1.1394 Litorimicrobium -6.666238 8.027002e-05
KC963966.1.1493 Methylobacter -7.669797 8.027002e-05
FJ869046.1.1459 Aestuariicoccus -6.639487 8.027002e-05
ATTE01000001.3195237.3196755 Leucothrix -5.217210 8.027002e-05
DQ234217.1.1519 HIMB11 -6.301259 1.167209e-04
CP022207.429135.430671 Francisella -7.170323 1.656684e-04
AB550558.1.1436 Primorskyibacter -4.244944 3.050166e-04
AB681705.1.1449 Mesoflavibacter -5.602549 7.052793e-04
JX844498.1.1439 Subsaxibacter -4.737066 7.052793e-04
res_sig <- dge.1 %>% filter(FDR<0.001)
pdf(paste0(wdir, "fig/edger-light_cleanVSdark_clean-scatter-genus-16s.pdf"), width = 15, height = 13)
ggplot(res_sig, aes(x=Genus, y=logFC, color=Phylum)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png 
##   2
# test differential expression: (light.clean_gas + light.used_gas) - dark.clean_gas
qlf <- glmQLFTest(fit, contrast = c(-1,1/2,1/2,0,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.001)) #FDR<0.001
##        -1*Culture_setupdark.clean_gas 0.5*Culture_setuplight.clean_gas 0.5*Culture_setuplight.used_gas
## Down                                                                                                38
## NotSig                                                                                            1053
## Up                                                                                                  24
#       -1*Culture_setupdark.clean_gas 0.5*Culture_setuplight.clean_gas 0.5*Culture_setuplight.used_gas
#Down                                                                                                64
#NotSig                                                                                             997
#Up                                                                                                  54
dge.2 <- topTags(qlf, n = Inf, p.value = 0.001)$table
dge.2 %>% filter(FDR<0.001 & logFC > 1) %>% nrow()
## [1] 24
knitr::kable(as.matrix(dge.2 %>% filter(FDR<0.001 & logFC > 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
HM057705.1.1470 Cyanobium_PCC-6307 5.258539 2.949909e-06
KY770139.1.1394 Thalassospira 7.064298 1.402626e-05
KJ728848.1.1458 Wenyingzhuangia 6.417309 1.445156e-05
CP011456.3050785.3052272 Aphanizomenon_NIES81 6.595324 1.838711e-05
KU951800.1.1271 Schizothrix_LEGE_07164 5.881805 2.783457e-05
EF654088.1.1482 Geitlerinema_PCC-7105 8.626385 2.877699e-05
KY807915.1.1369 Oxyphotobacteria_Incertae_Sedis_Unknown_Family_Unknown_Family 8.380365 6.811987e-05
HM217045.1.1446 Leptolyngbya_LEGE-06070 7.071307 1.166464e-04
KU951847.1.1466 Symphothece_PCC-7002 8.111378 1.429601e-04
KU578816.1.1368 NS2b_marine_group 4.970993 1.590979e-04
GU230385.1.1498 Peredibacter 4.566924 2.417101e-04
AB062106.1.1479 Porphyrobacter 4.908613 2.492908e-04
JN428535.1.1439 Flexithrix 6.724950 2.614798e-04
KY379855.1.2514 Synechocystis_PCC-6803 6.255655 2.903490e-04
LC275989.1.1406 Limnothrix 7.158101 3.835265e-04
EU672857.1.1492 Congregibacter 4.295788 3.903955e-04
DQ015830.1.1494 Oleiphilus 3.914621 3.928785e-04
KM587636.1.1505 Cyclobacteriaceae 4.888703 6.036249e-04
KM019986.1.1462 Pleurocapsa_PCC-7319 7.026484 6.969247e-04
ALVO01000007.393816.395283 Geminocystis_PCC-6308 5.807228 7.004955e-04
KF733610.1.1535 Gimesia 4.068907 8.445897e-04
KM820575.1.1347 Paraglaciecola 3.324822 8.751609e-04
AB245935.1.1459 Aureispira 7.677378 8.751609e-04
AM279196.8.1489 NS11-12_marine_group 3.984948 8.845655e-04
dge.2 %>% filter(FDR<0.001 & logFC < 1) %>% nrow()
## [1] 38
knitr::kable(as.matrix(dge.2 %>% filter(FDR<0.001 & logFC < 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
KC963966.1.1493 Methylobacter -7.662905 2.635192e-07
KC247325.1.1400 Tropicibacter -4.705569 2.635192e-07
CP022207.429135.430671 Francisella -8.211281 1.058575e-06
EF471465.1.1394 Litorimicrobium -6.611537 1.434257e-06
ATTE01000001.3195237.3196755 Leucothrix -5.029577 1.984477e-06
CP018572.2205903.2207370 Marivivens -6.066694 1.984477e-06
AB681705.1.1449 Mesoflavibacter -5.956536 5.265970e-06
AB930603.1.1457 Methylomicrobium -7.344289 1.092613e-05
KC534174.1.1363 Aequorivita -5.631045 1.387040e-05
FJ869046.1.1459 Aestuariicoccus -5.424786 1.445156e-05
AB500095.1.1519 Marinicella -4.125019 1.445156e-05
AY546508.1.1498 Methylomonadaceae_uncultured -7.765463 1.445156e-05
KY437089.1.1426 Youngimonas -5.755412 1.445156e-05
JX844498.1.1439 Subsaxibacter -4.858413 1.668599e-05
FJ264570.1.1500 pItb-vmat-59 -6.536707 4.851993e-05
DQ234217.1.1519 HIMB11 -4.806107 4.851993e-05
FMJP01000825.542.2072 Methyloprofundus -5.165890 4.851993e-05
AY007296.1.1499 Methylosarcina -7.028768 4.851993e-05
CP015231.476529.477997 Ruegeria -3.581767 4.851993e-05
AB550558.1.1436 Primorskyibacter -3.659586 6.277639e-05
KF465276.1.1344 Methylomarinum -5.917652 9.508996e-05
AB453959.1.1457 Marine_Methylotrophic_Group_2 -6.184360 9.692300e-05
KC290421.1.1525 Crenothrix -6.180702 1.187234e-04
LN875339.1.1446 Flavobacteriaceae -3.820729 1.252776e-04
AY661691.1.1473 Tenacibaculum -3.839027 1.252776e-04
KF437680.1.1446 Vitellibacter -4.426200 1.320965e-04
FJ712568.1.1394 Milano-WF1B-03 -5.738365 1.590979e-04
AB607876.1.1417 Antarctobacter -3.455238 2.375354e-04
AB053128.1.1530 Alcanivorax -3.793586 2.375354e-04
MKSF01000014.384003.385521 Taibaiella -5.627458 2.479392e-04
CP019307.2493152.2494620 Phaeobacter -3.596651 2.479392e-04
GU451533.1.1243 Schleiferia -4.788819 2.479392e-04
EU287117.1.1428 Albimonas -3.411079 4.003033e-04
JQ197367.1.1350 Aquibacter -2.789719 4.676467e-04
HQ144198.1.1471 Mariniflexile -3.665439 5.352124e-04
FJ436725.1.1370 Celeribacter -2.747662 7.774477e-04
JX412960.1.1486 Flavirhabdus -4.582240 8.335923e-04
JX391335.1.1502 Methylophagaceae_uncultured -4.924455 9.761934e-04
pdf(paste0(wdir, "fig/edger-light_clean_usedVSdark_clean-scatter-genus-16s.pdf"), width = 15, height = 13)
ggplot(dge.2, aes(x=Genus, y=logFC, color=Phylum)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png 
##   2
# test differential expression: light.clean_gas - light.used_gas
qlf <- glmQLFTest(fit, contrast = c(0,1,-1,0,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.001)) #FDR<0.001
##        1*Culture_setuplight.clean_gas -1*Culture_setuplight.used_gas
## Down                                                               1
## NotSig                                                          1114
## Up                                                                 0
#       1*Culture_setuplight.clean_gas -1*Culture_setuplight.used_gas
#Down                                                               4
#NotSig                                                          1098
#Up                                                                 0
dge.3 <- topTags(qlf, n = Inf)$table
dge.3 %>% filter(FDR<0.001 & logFC > 1) %>% nrow()
## [1] 0
knitr::kable(as.matrix(dge.3 %>% filter(FDR<0.001 & logFC > 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
dge.3 %>% filter(FDR<0.001 & logFC < 1) %>% nrow()
## [1] 1
knitr::kable(as.matrix(dge.3 %>% filter(FDR<0.001 & logFC < 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
AM279196.8.1489 NS11-12_marine_group -5.153853 0.0009801917
res_sig <- dge.3 %>% filter(FDR<0.001)
pdf(paste0(wdir, "fig/edger-light_cleanVSlight_used-scatter-genus.pdf"), width = 15, height = 13)
ggplot(res_sig, aes(x=Genus, y=logFC, color=Phylum)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png 
##   2
# test differential expression: ocean - (light.clean_gas + light.used_gas + dark.clean_gas)/3
qlf <- glmQLFTest(fit, contrast = c(-1/3,-1/3,-1/3,1,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.001)) #FDR<0.001
##        -0.333333333333333*Culture_setupdark.clean_gas -0.333333333333333*Culture_setuplight.clean_gas -0.333333333333333*Culture_setuplight.used_gas 1*Culture_setupocean
## Down                                                                                                                                                                   23
## NotSig                                                                                                                                                                934
## Up                                                                                                                                                                    158
#       -0.333333333333333*Culture_setupdark.clean_gas -0.333333333333333*Culture_setuplight.clean_gas -0.333333333333333*Culture_setuplight.used_gas 1*Culture_setupocean
#Down                                                               45
#NotSig                                                            828
#Up                                                                242
dge.4 <- topTags(qlf, n = Inf, p.value = 0.001)$table
dge.4 %>% filter(FDR<0.001 & logFC > 1) %>% nrow()
## [1] 158
knitr::kable(as.matrix(dge.4 %>% filter(FDR<0.001 & logFC > 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
AY548995.1.1382 Lentimicrobiaceae 4.299767 3.128146e-10
EU799083.1.1494 NS5_marine_group 4.588771 6.742205e-10
KF741413.1.1479 Bacteroidetes_VC2.1_Bac22 4.702386 6.742205e-10
KT266595.1.1504 Desulforhopalus 4.164808 1.796639e-09
KC425506.1.1449 SAR116_clade 4.700587 1.796639e-09
DQ234096.1.1547 Arcobacteraceae_uncultured 4.398214 2.280214e-09
CP003984.2903910.2905362 Planktomarina 4.363532 2.280214e-09
JX391370.1.1507 Thiomicrorhabdus 4.352153 2.280214e-09
JN977181.1.1499 Sulfurovum 4.311595 4.011571e-09
DQ351806.1.1480 Bacteroidetes_BD2-2 4.161159 4.844382e-09
EU432455.1.1361 Marinifilaceae_uncultured 4.431837 4.890420e-09
KX172972.1.1444 Draconibacterium 3.926086 1.741132e-08
AM279194.8.1485 Crocinitomicaceae_uncultured 3.959948 1.741132e-08
CP000084.511358.512831 Clade_Ia 4.471599 1.741132e-08
DQ234254.1.1547 Pseudarcobacter 4.296199 1.997420e-08
KX177096.1.1491 Lachnospiraceae_uncultured 4.064750 1.997420e-08
JN107026.1.1478 Fermentibacteraceae 5.468135 1.997420e-08
DQ677856.1.1392 Prolixibacteraceae 4.428966 1.997420e-08
KT266596.1.1481 Marinifilum 4.209685 2.499360e-08
FJ545595.1.1521 NS4_marine_group 4.007211 4.274081e-08
JX391804.1.1475 Sulfurimonas 3.964505 4.274081e-08
AACY020462030.661.2167 Candidatus_Actinomarina 4.015651 4.274081e-08
HM057617.1.1504 SAR86_clade 3.997552 4.274081e-08
FR733673.1.1549 Desulfobacter 3.960248 4.274081e-08
KF268736.1.1370 Roseimarinus 4.784350 4.355030e-08
KC492879.1.1435 MSBL3 4.466855 5.493685e-08
EF632644.1.1398 Hungateiclostridiaceae_uncultured 4.693646 5.904163e-08
KF650753.1.1478 Halarcobacter 4.147976 6.730820e-08
FR684261.1.1500 Pseudohongiella 4.120351 6.879621e-08
JF344182.1.1488 Marinilabiliaceae_uncultured 3.733532 7.015661e-08
KR825117.1.1534 R76-B128 3.964217 8.377625e-08
FJ672721.1.1394 Alkaliflexus 4.446077 1.153754e-07
AY177803.1.1522 Desulfoconvexum 4.175328 1.153754e-07
GQ274297.1.1512 Ichthyenterobacterium 3.333161 1.153754e-07
HQ008573.1.1509 hgcI_clade 4.161558 1.198956e-07
KX097755.1.1517 MSBL8 3.982331 1.359139e-07
HM234096.1.1443 Lutibacter 3.703584 1.654225e-07
FJ202580.1.1470 Defluviitaleaceae_UCG-011 4.014297 1.849523e-07
JQ580521.1.1521 Desulfosarcinaceae_uncultured 4.012735 2.083910e-07
JF344429.1.1502 Spirochaeta_2 4.044421 2.113200e-07
AACY020059431.1419.2884 Clade_III 3.942214 2.116204e-07
KC582602.1.1415 Fluviicola 3.105267 2.129226e-07
FLOH01001865.2173.3719 Desulfatiglans 3.876941 2.582172e-07
JF344214.1.1499 Sphaerochaeta 3.948439 3.244416e-07
FJ436732.1.1375 Lentibacter 4.034904 3.597343e-07
HM057738.1.1543 OM43_clade 4.014748 3.826552e-07
GQ356973.1.1496 livecontrolB21 3.813504 4.843966e-07
GQ246314.1.1499 Woeseia 4.527468 5.946380e-07
KF799284.1.1451 Synechococcus_CC9902 3.298238 6.611743e-07
FM204974.1.1427 Dysgonomonadaceae_uncultured 4.257319 8.567944e-07
JX391453.1.1506 Desulfocapsaceae_uncultured 4.039990 9.585658e-07
AACY020291711.299.1802 Marinobacterium 3.882655 1.051261e-06
KX088587.1.1518 Sediminispirochaeta 4.178136 1.765495e-06
JX016104.1.1480 Flavicella 5.492827 2.218415e-06
HQ203800.1.1480 Arcobacteraceae 4.094904 2.226069e-06
AACY020468607.261.1725 Clade_II 3.872826 2.630271e-06
EU358688.1.1512 Treponema 3.751128 3.550629e-06
FR684473.1.1498 RS62_marine_group 4.057283 3.633471e-06
EU795171.11703.13180 Thiotrichaceae_uncultured 3.665790 4.549235e-06
CP017259.618357.619876 Formosa 3.161721 4.649038e-06
EU239499.1.1482 Lutimonas 3.031947 6.045904e-06
EU795239.16117.17616 Ulvibacter 2.908831 6.983980e-06
JX403044.1.1509 Arcobacter 4.568130 6.983980e-06
AB546078.1.1345 Anaerovorax 4.685314 6.983980e-06
JX391399.1.1481 Saccharicrinis 3.974172 9.504767e-06
EF061973.1.1483 Labilibacter 3.896357 1.260831e-05
FPLK01002149.16.1518 CL500-29_marine_group 3.969527 1.421805e-05
AB630524.1.1448 Bacteroidetes_vadinHA17 4.053649 1.819234e-05
JQ925053.1.1549 Candidatus_Omnitrophus 4.745160 1.819234e-05
GQ249615.1.1406 Latescibacterota 3.734224 1.973933e-05
EF044234.1.1421 Roseibacterium 2.309185 2.074793e-05
AY771944.1.1521 Desulfosarcina 4.185723 2.074793e-05
EU864446.1.1617 [Eubacterium]_coprostanoligenes_group 3.813844 2.198103e-05
HQ405605.1.1522 SG8-4 4.557494 2.209915e-05
EU642572.1.1399 Thiothrix 3.593740 2.224185e-05
JX225893.1.1480 WCHB1-32 4.058192 2.271422e-05
DQ811934.1.1510 Calditrichaceae 4.947964 3.020438e-05
HE652874.1.1297 Lentimicrobium 4.029582 3.036462e-05
KX097528.1.1498 Caldithrix 3.981808 3.156894e-05
GQ356959.1.1520 Sva0485 4.461833 3.263169e-05
EU287304.1.1489 Eudoraea 3.744319 3.360131e-05
AY344418.1.1493 NS9_marine_group 2.779503 3.369507e-05
CP019352.2140807.2142329 Lacinutrix 3.982231 3.650767e-05
LC192989.1.1290 Stappiaceae 4.872698 4.300884e-05
KR824986.1.1515 Aminicenantales 3.586835 4.410536e-05
HM057716.1.1496 SAR324_clade(Marine_group_B) 3.791822 5.296689e-05
AB630756.1.1458 Arenicellaceae_uncultured 4.418655 6.308305e-05
CP021023.969900.971425 L21-RPul-D3 4.613935 6.308305e-05
FJ517016.1.1505 MidBa8 4.946262 7.698632e-05
DQ831540.1.1516 Desulfobacteraceae_uncultured 4.063789 7.698632e-05
FR685242.1.1486 NS3a_marine_group 3.474037 7.698632e-05
JX017163.1.1500 MB11C04_marine_group 4.364665 9.022946e-05
FM242394.1.1450 Planktothricoides_SR001 4.262996 9.022946e-05
HG971004.1.1492 Rikenellaceae_RC9_gut_group 3.474231 9.022946e-05
FM242289.1.1473 Leptotrichiaceae_uncultured 3.631345 9.192667e-05
EU801277.1.1280 SAR11_clade_uncultured_uncultured 3.853660 1.116840e-04
JF514267.1.1484 Christensenellaceae_uncultured 3.639531 1.131463e-04
EU592362.1.1510 Marinimicrobia_(SAR406_clade) 4.201891 1.131463e-04
DQ811912.1.1485 Salinirepens 4.054921 1.141285e-04
JN119244.1.1494 MWH-UniP1_aquatic_group 4.133162 1.141368e-04
KX933250.1.1378 PeM15 2.796338 1.176452e-04
HM231145.1.1450 Ruminococcus 4.843789 1.347174e-04
AY188291.1.1480 Christensenellaceae_R-7_group 4.396753 1.354872e-04
JQ579954.1.1492 IheB3-7 3.343783 1.365589e-04
AY337519.1.1512 Clostridium_sensu_stricto_1 3.275686 1.608877e-04
KR825137.1.1546 JS1 3.954690 1.619184e-04
HQ405627.1.1487 Demequina 4.241660 1.619184e-04
KJ752776.1.1445 Vicingus 2.924077 1.619184e-04
EU719657.1.1388 Dethiosulfovibrio 4.870964 1.619184e-04
AJ431235.1.1477 Lenti-02 3.801318 1.695557e-04
FR683698.1.1427 Octadecabacter 3.049596 1.807085e-04
EU488104.1.1522 BD2-11_terrestrial_group 4.624778 1.810754e-04
FO203503.30555.32102 Desulfobacula 4.229121 1.882647e-04
AB176674.1.1504 Lishizhenia 4.466516 2.003596e-04
FJ825870.1.1488 Cryomorphaceae_uncultured 3.277393 2.020620e-04
LC193132.1.1465 Cocleimonas 3.395675 2.067887e-04
JX391227.1.1480 MAT-CR-H4-C10 4.464705 2.074201e-04
GQ246449.1.1491 4572-13 3.466848 2.105930e-04
HQ132402.1.1485 LCP-89 3.421611 2.367440e-04
HQ326320.1.1506 Rhodanobacter 4.234875 2.392887e-04
KP091115.1.1623 SEEP-SRB1 4.388404 2.447958e-04
KX097751.1.1498 UCG-012 4.593284 2.465312e-04
JQ197367.1.1350 Aquibacter 2.350957 2.555607e-04
HM992535.1.1347 Thermovirga 3.686608 2.555607e-04
AY177796.1.1501 [Desulfobacterium]_catecholicum_group 4.579529 2.818517e-04
FJ664782.1.1404 Paludibacteraceae_uncultured 4.387308 2.818517e-04
JN458997.1.1446 0319-6G20 4.601291 2.877755e-04
KC665950.1.1514 Anaerophaga 4.072531 2.919247e-04
HF922341.1.1384 Dethiosulfatibacter 4.369118 3.109274e-04
JN637820.1.1471 Sulfurospirillum 4.307876 3.236673e-04
GQ383925.1.1483 Algoriella 3.992145 3.313441e-04
GU196238.1.1492 Paludibacter 3.913049 3.313441e-04
JF268397.1.1477 SB-5 4.238587 3.611305e-04
AY548942.1.1471 Anaerolineaceae_uncultured 4.295069 3.715710e-04
LS997911.1.1416 Desulfovibrio 4.560066 3.852181e-04
DQ302106.1.1491 Coraliomargarita 4.388408 4.172618e-04
FN433319.1.1575 Leeuwenhoekiella 4.045886 4.247481e-04
JX391727.1.1479 Prolixibacteraceae_uncultured 3.927565 5.049171e-04
KM255690.1.1435 Thioclava 2.756104 5.092086e-04
KR825155.1.1455 Candidatus_Thiobios 3.110537 5.092086e-04
EU799705.1.1457 PS1_clade 3.029088 5.452319e-04
JX391178.1.1531 Desulfitobacteriales_uncultured_uncultured 3.813120 5.467212e-04
EF574272.1.1491 PB19 2.464247 5.615888e-04
KF741432.1.1496 Clostridia_vadinBB60_group 3.959153 5.790300e-04
AB212893.1.1508 Luteibacter 4.284996 5.907625e-04
EF400726.1.1488 Odoribacter 4.200624 5.940350e-04
EF655677.1.1491 Guggenheimella 3.603800 7.171432e-04
KF799120.1.1498 HOC36 3.739430 7.447251e-04
FJ189532.1.1479 SBR1031 3.979767 7.447251e-04
HQ891651.1.1387 Planktotalea 2.551894 7.817720e-04
JX016885.1.1476 Flavobacteriaceae_uncultured 2.164628 8.136996e-04
GQ850575.1.1521 Subgroup_23 3.624799 8.136996e-04
JN496611.1.1463 LD1-PA32 4.082652 9.179859e-04
JN387332.1.1319 RBG-13-54-9 3.455901 9.179859e-04
GU451382.1.1229 Rubidimonas 3.267613 9.179859e-04
JQ580392.1.1467 Acetobacterium 4.135390 9.707469e-04
FJ960414.1.1392 Weeksellaceae_uncultured 3.665669 9.836180e-04
EU802139.1.1502 OM182_clade 3.419255 9.836180e-04
dge.4 %>% filter(FDR<0.001 & logFC < 1) %>% nrow()
## [1] 23
knitr::kable(as.matrix(dge.4 %>% filter(FDR<0.001 & logFC < 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
MH197117.1.1432 Nioella -3.534586 9.504767e-06
KJ754648.1.1322 Nautella -5.488589 9.504767e-06
JX426065.1.1385 Taeseokella -4.372977 9.785866e-06
MH396689.1.1476 Cohaesibacter -4.197273 1.156314e-05
AB627076.1.1425 Lutimaribacter -4.900967 2.007194e-05
MK053889.1.1442 Marinovum -4.118054 2.195942e-05
CP002738.392605.394125 Methylomonas -5.006456 2.567470e-05
GU324080.1.1459 Marivita -5.117043 2.677103e-05
EU221274.1.1422 Seohaeicola -3.409820 3.199306e-05
AB062106.1.1479 Porphyrobacter -4.333170 7.904373e-05
CP012040.3233347.3234858 Cyclobacterium -4.944991 9.106964e-05
AB238790.1.1469 Aliiglaciecola -4.188013 1.141368e-04
AJ786600.1.1449 Hoeflea -3.567957 1.204804e-04
EU979477.1.1429 Pseudorhodobacter -3.648229 1.365589e-04
KC160833.1.1501 Marinobacter -3.195230 1.996241e-04
FJ624296.1.1434 Sagittula -2.901668 2.067887e-04
KC120671.1.1353 Sneathiella -3.699667 2.288779e-04
GQ441325.1.1482 Lewinella -4.127695 2.887393e-04
HQ316943.1.1422 Oricola -3.100572 4.714132e-04
JN119141.1.1383 Rhodobacteraceae -2.515310 5.574295e-04
KP137426.1.1396 Denitromonas -3.585768 7.447251e-04
MH463965.1.1530 Mesorhizobium -3.616027 7.845981e-04
JX223093.1.1487 Leucobacter -3.626221 8.136996e-04
res_sig <- dge.4 %>% filter(FDR<0.001)
pdf(paste0(wdir, "fig/edger-oceanVSinoculum-scatter-genus-16s.pdf"), width = 35, height = 13)
ggplot(res_sig, aes(x=Genus, y=logFC, color=Phylum)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png 
##   2


Linear time course trends under light of genera with edgeR (minimap2)

# time course trend analysis
library(splines)
meta.light <- meta.sub %>% filter(Culture_setup == "light.clean_gas" | Culture_setup == "light.used_gas" | Culture_setup == "ocean") %>% filter(Inoculum != "Hornbaek.shallow")
X <- ns(meta.light$Time_point, df=1)
design.light <- model.matrix(~X + Inoculum, data=meta.light)
biom.16s.m.light <- prune_samples(sample_names(biom.16s.m.genus) %in% c("flaregas-1","flaregas-2","flaregas-3","flaregas-10","flaregas-11","flaregas-12","flaregas-13","flaregas-14","flaregas-15","flaregas-16","flaregas-17","flaregas-18","flaregas-19","flaregas-20"), biom.16s.m.genus)
er <- phyloseq2edgeR(biom.16s.m.light)
er <- edgeR::calcNormFactors(er, method = "TMM")
er <- edgeR::estimateDisp(er, design.light)
plotBCV(er)

sqrt(er$common.dispersion)
## [1] 0.9915841
fit <- glmQLFit(er, design.light, robust = TRUE)
plotQLDisp(fit)

qlf <- glmQLFTest(fit, coef=2)
summary(decideTests(qlf, p.value=0.01))
##           X
## Down     85
## NotSig 1002
## Up       28
#          X
#Down     60
#NotSig 1033
#Up       22

#filter genera that where detected with Emu (filter low abundant genera)
dge.5 <- topTags(qlf, n = Inf, p.value = 1)$table
dge.5 <- dge.5[biom.16s.genus.minimap2emu$SpeciesID,]

# visualize the fitted spline curves for the top four enriched species under cultivation conditions
dge.5.u <- dge.5 %>% filter(FDR<0.01 & logFC > 1)
dge.5.u %>% nrow()
## [1] 21
logCPM.obs <- cpm(er, log=TRUE, prior.count=qlf$prior.count)
logCPM.fit <- cpm(qlf, log=TRUE)
pdf(paste0(wdir, "fig/edger-time-course-light-genus-16s-1.pdf"), width = 15, height = 15)
par(mfrow=c(3,7))
for(i in 1:nrow(dge.5.u)) {
 FlybaseID <- row.names(dge.5.u)[i]
 Symbol <- dge.5.u$Genus[i]
 logCPM.obs.i <- logCPM.obs[FlybaseID,]
 logCPM.fit.i <- logCPM.fit[FlybaseID,]
 plot(meta.light$Time_point, logCPM.obs.i, ylab="log-CPM", xlab="Time point", main=Symbol, pch=16)
 lines(meta.light$Time_point, logCPM.fit.i, col="red", lwd=2)
}
dev.off()
## png 
##   2

Linear time course trends in the dark of genera with edgeR (minimap2)

# time course trend analysis
library(splines)
meta.dark <- meta.sub %>% filter(Culture_setup == "dark.clean_gas" | Culture_setup == "ocean") %>% filter(Inoculum != "Helsingor")
X <- ns(meta.dark$Time_point, df=1)
design.dark <- model.matrix(~X + Inoculum, data=meta.dark)
biom.16s.m.dark <- prune_samples(sample_names(biom.16s.m.genus) %in% c("flaregas-11","flaregas-12","flaregas-21","flaregas-22","flaregas-23","flaregas-24","flaregas-25","flaregas-26","flaregas-27","flaregas-28","flaregas-29","flaregas-30"), biom.16s.m.genus)
er <- phyloseq2edgeR(biom.16s.m.dark)
er <- edgeR::calcNormFactors(er, method = "TMM")
er <- edgeR::estimateDisp(er, design.dark)
plotBCV(er)

sqrt(er$common.dispersion)
## [1] 0.7714087
fit <- glmQLFit(er, design.dark, robust = TRUE)
plotQLDisp(fit)

qlf <- glmQLFTest(fit, coef=2)
summary(decideTests(qlf, p.value=0.01))
##          X
## Down   233
## NotSig 833
## Up      49
#         X
#Down   248
#NotSig 837
#Up      30

#filter genera that where detected with Emu (filter low abundant genera)
dge.6 <- topTags(qlf, n = Inf, p.value = 1)$table
dge.6 <- dge.6[biom.16s.genus.minimap2emu$SpeciesID,]

# visualize the fitted spline curves for the top four enriched species under cultivation conditions
dge.6.u <- dge.6 %>% filter(FDR<0.01 & logFC > 1)
dge.6.u %>% nrow()
## [1] 47
logCPM.obs <- cpm(er, log=TRUE, prior.count=qlf$prior.count)
logCPM.fit <- cpm(qlf, log=TRUE)
pdf(paste0(wdir, "fig/edger-time-course-dark-genus-16s-1.pdf"), width = 15, height = 15)
par(mfrow=c(4,7))
for(i in 1:28) {
 FlybaseID <- row.names(dge.6.u)[i]
 Symbol <- dge.6.u$Genus[i]
 logCPM.obs.i <- logCPM.obs[FlybaseID,]
 logCPM.fit.i <- logCPM.fit[FlybaseID,]
 plot(meta.dark$Time_point, logCPM.obs.i, ylab="log-CPM", xlab="Time point", main=Symbol, pch=16)
 lines(meta.dark$Time_point, logCPM.fit.i, col="red", lwd=2)
}
dev.off()
## png 
##   2
pdf(paste0(wdir, "fig/edger-time-course-dark-genus-16s-2.pdf"), width = 15, height = 15)
par(mfrow=c(3,7))
for(i in 29:nrow(dge.6.u)) {
 FlybaseID <- row.names(dge.6.u)[i]
 Symbol <- dge.6.u$Genus[i]
 logCPM.obs.i <- logCPM.obs[FlybaseID,]
 logCPM.fit.i <- logCPM.fit[FlybaseID,]
 plot(meta.dark$Time_point, logCPM.obs.i, ylab="log-CPM", xlab="Time point", main=Symbol, pch=16)
 lines(meta.dark$Time_point, logCPM.fit.i, col="red", lwd=2)
}
dev.off()
## png 
##   2


Compare results of DESeq2 and edgeR on genus level

alpha <- 0.001
# test differential expression: light.clean_gas - dark.clean_gas
gene_list <- list(
  DESeq2 = res.1 %>% as.data.frame %>% dplyr::filter(padj < alpha & log2FoldChange<1) %>% rownames,
  edgeR = dge.1 %>% dplyr::filter(FDR < alpha & logFC<1) %>% rownames
)
#venn.diagram(x = gene_list, filename = "venn.light-cleanVSdark-clean.down.tiff")
p.a <- venn(gene_list)

gene_list <- list(
  DESeq2 = res.1 %>% as.data.frame %>% dplyr::filter(padj < alpha & log2FoldChange>1) %>% rownames,
  edgeR = dge.1 %>% dplyr::filter(FDR < alpha & logFC>1) %>% rownames
)
p.b <- venn(gene_list)

Compare light_clean_usedVSdark results of ALDEX and edgeR on genus level

alpha <- 0.001
# test differential expression: (light.clean_gas + light.used_gas) - dark.clean_gas
gene_list <- list(
  aldex = glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1,] %>% row.names(), 
  edgeR = dge.2 %>% filter(FDR < alpha & logFC > 1) %>% rownames
)
p.a <- venn(gene_list)

knitr::kable(as.matrix(tax_table(biom.16s.m.genus)[attr(p.a,"intersections")$aldex,]))
gene_list <- list(
  aldex = glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1,] %>% row.names(), 
  edgeR = dge.2 %>% filter(FDR < alpha & logFC < 1) %>% rownames
)
p.a <- venn(gene_list)

knitr::kable(as.matrix(tax_table(biom.16s.m.genus)[attr(p.a,"intersections")$aldex,]))

Compare results of ocean VS inoculum and time course trends in edgeR on genus level

alpha <- 0.01
# test differential expression: ocean - (light.clean_gas + light.used_gas + dark.clean_gas)/3
#light
gene_list <- list(
  edgeR.contrast = dge.4 %>% filter(FDR < alpha & logFC > 1) %>% rownames,
  edgeR.splines = dge.5 %>% filter(FDR < alpha & logFC < 1) %>% rownames
)
p.a <- venn(gene_list)

gene_list <- list(
  edgeR.contrast = dge.4 %>% filter(FDR < alpha & logFC < 1) %>% rownames,
  edgeR.splines = dge.5 %>% filter(FDR < alpha & logFC > 1) %>% rownames
)
p.b <- venn(gene_list)

#dark
gene_list <- list(
  edgeR.contrast = dge.4 %>% filter(FDR < alpha & logFC > 1) %>% rownames,
  edgeR.splines = dge.6 %>% filter(FDR < alpha & logFC < 1) %>% rownames
)
p.a <- venn(gene_list)

gene_list <- list(
  edgeR.contrast = dge.4 %>% filter(FDR < alpha & logFC < 1) %>% rownames,
  edgeR.splines = dge.6 %>% filter(FDR < alpha & logFC > 1) %>% rownames
)
p.b <- venn(gene_list)

Network construction and characterization

Semi-Parametric Rank-based approach for INference in Graphical model (SPRING)

#https://github.com/stefpeschel/NetCoMi
library(NetCoMi)
## Loading required package: SpiecEasi
## 
## Attaching package: 'SpiecEasi'
## The following object is masked from 'package:MASS':
## 
##     fitdistr
## 
net_spring <- netConstruct(biom.16s,
                           filtTax = "highestFreq",
                           filtTaxPar = list(highestFreq = 50),
                           measure = "spring",
                           measurePar = list(nlambda=50, 
                                             rep.num=50,
                                             Rmethod = "approx"),
                           normMethod = "none", 
                           zeroMethod = "none",
                           sparsMethod = "none", 
                           dissFunc = "signed",
                           verbose = 3,
                           seed = 123456)
## Checking input arguments ...
## Done.
## Data filtering ...
## 928 taxa removed.
## 50 taxa and 24 samples remaining.
## 
## Calculate 'spring' associations ... 
## Registered S3 method overwritten by 'dendextend':
##   method     from 
##   rev.hclust vegan
## Registered S3 method overwritten by 'seriation':
##   method         from 
##   reorder.hclust vegan
## Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_spring$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
     xlab = "Estimated correlation", 
     main = "Estimated correlations after sparsification")

net_spring_unsigned <- netConstruct(data = net_spring$assoEst1,
                           dataType = "correlation",
                           sparsMethod = "threshold",
                           thresh = 0.3,
                           dissFunc = "unsigned",
                           verbose = 3)
## Checking input arguments ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_spring_unsigned$adjaMat1, 100, ylim = c(0, 400),
     xlab = "Adjacency values", 
     main = "Adjacencies (with \"unsigned\" transformation)")

temp.nw <- net_spring
props_spring <- netAnalyze(temp.nw, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           hubPar = "eigenvector",
                           weightDeg = FALSE, normDeg = FALSE)

#?summary.microNetProps
print(summary(props_spring, numbNodes = 5L))
## 
## Component sizes
## ```````````````        
## size: 50
##    #:  1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##                                  
## Number of components      1.00000
## Clustering coefficient    0.20010
## Modularity                0.61445
## Positive edge percentage 80.00000
## Edge density              0.06531
## Natural connectivity      0.02556
## Vertex connectivity       1.00000
## Edge connectivity         1.00000
## Average dissimilarity*    0.97871
## Average path length**     3.26313
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
## 
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ```````````````````````````````` 
##                   
## name:  1 2  3 4  5
##    #: 14 7 10 8 11
## 
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````       
##  193874
##  322860
##  91996 
## 
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##         
## 91996  9
## 164629 5
## 154360 5
## 218070 5
## 17815  5
## 
## Betweenness centrality (normalized):
##               
## 91996  0.53656
## 25715  0.38350
## 88629  0.33163
## 434721 0.30612
## 434720 0.27891
## 
## Closeness centrality (normalized):
##               
## 91996  0.65420
## 322860 0.55059
## 193874 0.54965
## 158075 0.54771
## 154241 0.53375
## 
## Eigenvector centrality (normalized):
##               
## 91996  1.00000
## 322860 0.68751
## 193874 0.65340
## 154360 0.63810
## 164629 0.62672
pdf(paste0(wdir, "fig/network-spring-16s.pdf"), width = 30, height = 15)
p <- plot(props_spring,
          edgeInvisFilter = "threshold",
          edgeInvisPar = 0.01,
          nodeColor = "cluster",
          nodeSize = "eigenvector",
          repulsion = 0.8,
          rmSingles = TRUE,
          labelScale = FALSE,
          cexLabels = 1.6,
          nodeSizeSpread = 3,
          cexNodes = 2,
          hubBorderCol = "darkgray",
          title1 = "Network on species level with SPRING associations",
          showTitle = TRUE,
          cexTitle = 2.3)
legend(0.57, 1.1, cex = 2.2, title = "estimated association:",
       legend = c("+","-"), lty = 1, lwd = 3, col = c("#009900","red"), 
       bty = "n", horiz = FALSE)
dev.off()

#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
biom.16s.genus.top50 <- prune_taxa(names(sort(taxa_sums(biom.16s.genus), decreasing = TRUE)[1:50]), biom.16s.genus)
net_spring_genus <- netConstruct(biom.16s.genus.top50,
                           #filtTax = "highestFreq",
                           #filtTaxPar = list(highestFreq = 50),
                           taxRank = "Genus",
                           measure = "spring",
                           measurePar = list(nlambda=50, 
                                             rep.num=50,
                                             Rmethod = "approx"),
                           normMethod = "none", 
                           zeroMethod = "none",
                           sparsMethod = "none", 
                           dissFunc = "signed",
                           verbose = 3,
                           seed = 123456)
## Checking input arguments ... Done.
## 50 taxa and 24 samples remaining.
## 
## Calculate 'spring' associations ... 
## Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_spring_genus$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
     xlab = "Estimated correlation", 
     main = "Estimated correlations after sparsification")

net_spring_genus_unsigned <- netConstruct(data = net_spring_genus$assoEst1,
                           dataType = "correlation",
                           sparsMethod = "threshold",
                           thresh = 0.3,
                           dissFunc = "unsigned",
                           verbose = 3)
## Checking input arguments ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_spring_genus_unsigned$adjaMat1, 100, ylim = c(0, 400),
     xlab = "Adjacency values", 
     main = "Adjacencies (with \"unsigned\" transformation)")

temp.nw <- net_spring_genus
props_spring_genus <- netAnalyze(temp.nw, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           #hubPar = "eigenvector",
                           hubPar = "betweenness",
                           hubQuant = 0.9,
                           weightDeg = FALSE, normDeg = FALSE)

#?summary.microNetProps
print(summary(props_spring_genus, numbNodes = 5L))
## 
## Component sizes
## ```````````````        
## size: 50
##    #:  1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##                                  
## Number of components      1.00000
## Clustering coefficient    0.22046
## Modularity                0.61130
## Positive edge percentage 79.72973
## Edge density              0.06041
## Natural connectivity      0.02528
## Vertex connectivity       1.00000
## Edge connectivity         1.00000
## Average dissimilarity*    0.98042
## Average path length**     2.94850
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
## 
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ```````````````````````````````` 
##                      
## name:  1  2 3 4 5 6 7
##    #: 12 11 6 4 6 5 6
## 
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````                        
##  Leptolyngbya_LEGE-06070
##  Marinovum              
##  Phaeobacter            
##  Pseudarcobacter        
##  RS62_marine_group      
## 
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##                          
## Marinovum               6
## Leptolyngbya_LEGE-06070 6
## Sulfurovum              5
## Pseudarcobacter         5
## Phaeobacter             5
## 
## Betweenness centrality (normalized):
##                                
## Pseudarcobacter         0.23044
## RS62_marine_group       0.22364
## Leptolyngbya_LEGE-06070 0.21259
## Marinovum               0.21173
## Phaeobacter             0.19728
## 
## Closeness centrality (normalized):
##                                
## Marinovum               0.57124
## Phaeobacter             0.54792
## Pseudarcobacter         0.54395
## RS62_marine_group       0.53124
## Leptolyngbya_LEGE-06070 0.52883
## 
## Eigenvector centrality (normalized):
##                          
## Sulfurovum        1.00000
## RS62_marine_group 0.97937
## NS5_marine_group  0.82058
## Thiomicrorhabdus  0.76638
## Polaribacter      0.72628
pdf(paste0(wdir, "fig/network-spring-genus-16s.pdf"), width = 30, height = 15)
p <- plot(props_spring_genus,
          edgeInvisFilter = "threshold",
          edgeInvisPar = 0.01,
          nodeColor = "cluster",
          colorVec =  c("#804000", "#56B4E9", "#009960", "#999999", '#ff7f00', "#CC79A7", "#E69F00"),
          nodeSize = "eigenvector",
          repulsion = 0.8,
          rmSingles = TRUE,
          labelScale = FALSE,
          cexLabels = 2.4,
          nodeSizeSpread = 3,
          cexNodes = 2,
          hubBorderCol = "darkgray",
          #title1 = "Network on genus level with Pearson correlations",
          showTitle = FALSE,
          #cexTitle = 2.3,
          posCol = "#0072B2",
          negCol = '#e41a1c')
legend(0.75, 1.1, cex = 2.4, title = "estimated association:",
       legend = c("+","-"), lty = 1, lwd = 3, col = c("#0072B2", '#e41a1c'), 
       bty = "n", horiz = FALSE)
dev.off()
## png 
##   2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
##      75% 
## 0.344691
knitr::kable(as.matrix(sort(colSums(net_spring_genus$normCounts1), decreasing = TRUE)[1:20]))
Marivita 123729.200
Aureispira 80162.459
Lentibacter 51138.237
Geitlerinema_PCC-7105 31473.064
Methylophaga 30384.441
NS3a_marine_group 22247.805
Methylomicrobium 19798.463
Flavobacterium 16975.246
Saprospiraceae_uncultured 16357.548
Lewinella 14769.526
Cryomorphaceae_uncultured 12163.325
Marivivens 9780.107
Planktomarina 9761.932
Thalassospira 8487.833
Lutimaribacter 8360.902
Winogradskyella 8272.679
Cyanobium_PCC-6307 7024.799
Meridianimaribacter 6564.303
Symphothece_PCC-7002 6339.630
Pseudorhodobacter 5181.541

SPRING for time point 11 hours

#https://github.com/stefpeschel/NetCoMi
library(NetCoMi)
biom.16s.genus.top50.11hr <- prune_samples(sample_data(biom.16s.genus.top50) %>% as.data.frame() %>% as.matrix() %>% as.data.frame() %>% filter(Time_point == 11) %>% rownames(), biom.16s.genus.top50)

net_spring_genus.11 <- netConstruct(biom.16s.genus.top50.11hr,
                           #filtTax = "highestFreq",
                           #filtTaxPar = list(highestFreq = 50),
                           taxRank = "Genus",
                           measure = "spring",
                           measurePar = list(nlambda=50, 
                                             rep.num=50,
                                             Rmethod = "approx"),
                           normMethod = "none", 
                           zeroMethod = "none",
                           sparsMethod = "none", 
                           dissFunc = "signed",
                           verbose = 3,
                           seed = 123456)
## Checking input arguments ... Done.
## 50 taxa and 8 samples remaining.
## 
## Calculate 'spring' associations ... 
## Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_spring_genus.11$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
     xlab = "Estimated correlation", 
     main = "Estimated correlations after sparsification")

temp.nw <- net_spring_genus.11
props_spring_genus.11 <- netAnalyze(temp.nw, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           hubPar = "eigenvector",
                           weightDeg = FALSE, normDeg = FALSE)

#?summary.microNetProps
print(summary(props_spring_genus.11, numbNodes = 5L))
## 
## Component sizes
## ```````````````                   
## size: 17 6 5 4 2  1
##    #:  1 1 1 1 3 12
## ______________________________
## Global network properties
## `````````````````````````
## Largest connected component (LCC):
##                                  
## Relative LCC size         0.34000
## Clustering coefficient    0.30749
## Modularity                0.43878
## Positive edge percentage 90.47619
## Edge density              0.15441
## Natural connectivity      0.07955
## Vertex connectivity       1.00000
## Edge connectivity         1.00000
## Average dissimilarity*    0.95218
## Average path length**     2.48950
## 
## Whole network:
##                                  
## Number of components     19.00000
## Clustering coefficient    0.36825
## Modularity                0.72875
## Positive edge percentage 95.00000
## Edge density              0.03265
## Natural connectivity      0.02355
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
## 
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ```````````````````````````````` 
##                           
## name:  0 1 2 3 4 5 6 7 8 9
##    #: 12 7 7 6 5 4 2 3 2 2
## 
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````               
##  Phaeobacter   
##  Psychroserpens
##  Tropicibacter 
## 
## ______________________________
## Centrality measures
## - In decreasing order
## - Centrality of disconnected components is zero
## ````````````````````````````````````````````````
## Degree (unnormalized):
##                   
## Phaeobacter      5
## Tropicibacter    5
## Leucothrix       4
## Psychroserpens   4
## Rhodobacteraceae 3
## 
## Betweenness centrality (normalized):
##                                     
## Phaeobacter                  0.64167
## Leucothrix                   0.56667
## Methylophaga                 0.32500
## Planktomarina                0.23333
## Flavobacteriaceae_uncultured 0.23333
## 
## Closeness centrality (normalized):
##                                     
## Phaeobacter                  0.81422
## Tropicibacter                0.81414
## Leucothrix                   0.74242
## Psychroserpens               0.67666
## Flavobacteriaceae_uncultured 0.64401
## 
## Eigenvector centrality (normalized):
##                                     
## Phaeobacter                  1.00000
## Tropicibacter                0.99982
## Psychroserpens               0.67955
## Leucothrix                   0.65893
## Flavobacteriaceae_uncultured 0.59254
pdf(paste0(wdir, "fig/network-spring-genus-11hr-16s.pdf"), width = 30, height = 15)
p <- plot(props_spring_genus.11,
          edgeInvisFilter = "threshold",
          edgeInvisPar = 0.01,
          nodeColor = "cluster",
          colorVec =  c("#804000", "#56B4E9", "#009960", "#999999", "#357090", "#CC79A7", "#E69F00"),
          nodeSize = "eigenvector",
          repulsion = 0.8,
          rmSingles = TRUE,
          labelScale = FALSE,
          cexLabels = 1.6,
          nodeSizeSpread = 3,
          cexNodes = 2,
          hubBorderCol = "darkgray",
          #title1 = "Network on genus level with Pearson correlations",
          showTitle = FALSE,
          #cexTitle = 2.3,
          posCol = "#0072B2",
          negCol = '#e41a1c')
legend(0.75, 1.1, cex = 2.2, title = "estimated association:",
       legend = c("+","-"), lty = 1, lwd = 3, col = c("#0072B2", '#e41a1c'), 
       bty = "n", horiz = FALSE)
dev.off()
## png 
##   2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
##       75% 
## 0.3308134
knitr::kable(as.matrix(sort(colSums(net_spring_genus.11$normCounts1), decreasing = TRUE)[1:20]))
Marivita 93758.648
Aureispira 25102.082
Methylophaga 17800.597
Meridianimaribacter 5905.413
Winogradskyella 5088.945
Methylomicrobium 4118.112
Leucothrix 3891.838
Lutimaribacter 3275.711
Lewinella 3131.341
Psychroserpens 3079.585
Taeseokella 2880.727
Cellulophaga 2872.356
Flavobacterium 2466.225
Cyanobium_PCC-6307 2459.497
Rhodobacteraceae_uncultured 2316.024
Marinovum 2315.527
Rhodobacteraceae 2002.619
Hoeflea 1963.873
Methylobacter 1924.185
Phaeobacter 1905.210

Pearson’s correlation

#While with the “signed” transformation, positive correlated taxa are likely to belong to the same cluster, with the “unsigned” transformation clusters contain strongly positive and negative correlated taxa.
net_pears <- netConstruct(biom.16s,  
                          filtTax = "highestFreq",
                          filtTaxPar = list(highestFreq = 50),
                          measure = "pearson",
                          normMethod = "clr",
                          zeroMethod = "multRepl",
                          sparsMethod = "threshold",
                          thresh = 0.3,
                          dissFunc = "signed",
                          verbose = 3)
## Checking input arguments ... Done.
## Data filtering ...
## 928 taxa removed.
## 50 taxa and 24 samples remaining.
## 
## Zero treatment:
## Execute multRepl() ... Done.
## 
## Normalization:
## Execute clr(){SpiecEasi} ... Done.
## 
## Calculate 'pearson' associations ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_pears$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
     xlab = "Estimated correlation", 
     main = "Estimated correlations after sparsification")

net_pears_unsigned <- netConstruct(data = net_pears$assoEst1,
                           dataType = "correlation",
                           sparsMethod = "threshold",
                           thresh = 0.3,
                           dissFunc = "unsigned",
                           verbose = 3)
## Checking input arguments ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_pears_unsigned$adjaMat1, 100, ylim = c(0, 400),
     xlab = "Adjacency values", 
     main = "Adjacencies (with \"unsigned\" transformation)")

temp.nw <- net_pears
props_pears <- netAnalyze(temp.nw, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           hubPar = "eigenvector",
                           weightDeg = FALSE, normDeg = FALSE
                          )

#?summary.microNetProps
print(summary(props_pears, numbNodes = 5L))
## 
## Component sizes
## ```````````````        
## size: 50
##    #:  1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##                                  
## Number of components      1.00000
## Clustering coefficient    0.78484
## Modularity                0.04831
## Positive edge percentage 44.33962
## Edge density              0.69224
## Natural connectivity      0.19868
## Vertex connectivity      13.00000
## Edge connectivity        13.00000
## Average dissimilarity*    0.77277
## Average path length**     0.99918
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
## 
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ```````````````````````````````` 
##               
## name:  1  2  3
##    #: 14 20 16
## 
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````       
##  154281
##  193874
##  91970 
## 
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##          
## 154281 43
## 193874 43
## 91970  42
## 205285 42
## 154150 41
## 
## Betweenness centrality (normalized):
##               
## 431434 0.03316
## 193874 0.03316
## 391654 0.02211
## 332216 0.02126
## 154281 0.01956
## 
## Closeness centrality (normalized):
##               
## 154360 1.50977
## 193874 1.50318
## 205285 1.49145
## 91970  1.46304
## 154281 1.45782
## 
## Eigenvector centrality (normalized):
##               
## 193874 1.00000
## 154281 0.99297
## 91970  0.98020
## 205285 0.97734
## 154360 0.96529
pdf(paste0(wdir, "fig/network-pearson-16s.pdf"), width = 30, height = 15)
p <- plot(props_pears,
          edgeInvisFilter = "threshold",
          edgeInvisPar = 0.001,
          nodeColor = "cluster",
          nodeSize = "eigenvector",
          repulsion = 0.8,
          rmSingles = TRUE,
          labelScale = FALSE,
          cexLabels = 1.6,
          nodeSizeSpread = 3,
          cexNodes = 2,
          hubBorderCol = "darkgray",
          title1 = "Network on species level with Pearson correlations",
          showTitle = TRUE,
          cexTitle = 2.3)
legend(0.57, 1.1, cex = 2.2, title = "estimated association:",
       legend = c("+","-"), lty = 1, lwd = 3, col = c("#009900","red"), 
       bty = "n", horiz = FALSE)
dev.off()

#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
biom.16s.genus.top50 <- prune_taxa(names(sort(taxa_sums(biom.16s.genus), decreasing = TRUE)[1:50]), biom.16s.genus)
net_pears_genus <- netConstruct(biom.16s.genus.top50,
                          taxRank = "Genus",
                          measure = "pearson",
                          zeroMethod = "multRepl",
                          normMethod = "clr",
                          sparsMethod = "threshold",
                          thresh = 0.3,
                          verbose = 3)
## Checking input arguments ... Done.
## 50 taxa and 24 samples remaining.
## 
## Zero treatment:
## Execute multRepl() ... Done.
## 
## Normalization:
## Execute clr(){SpiecEasi} ... Done.
## 
## Calculate 'pearson' associations ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_pears_genus$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
     xlab = "Estimated correlation", 
     main = "Estimated correlations after sparsification")

net_pears_genus_unsigned <- netConstruct(data = net_pears_genus$assoEst1,
                           dataType = "correlation",
                           sparsMethod = "threshold",
                           thresh = 0.3,
                           dissFunc = "unsigned",
                           verbose = 3)
## Checking input arguments ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_pears_genus_unsigned$adjaMat1, 100, ylim = c(0, 400),
     xlab = "Adjacency values", 
     main = "Adjacencies (with \"unsigned\" transformation)")

temp.nw <- net_pears_genus
props_pears_genus <- netAnalyze(temp.nw, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           hubPar = "eigenvector",
                           weightDeg = FALSE, normDeg = FALSE)

#?summary.microNetProps
print(summary(props_pears_genus, numbNodes = 5L))
## 
## Component sizes
## ```````````````        
## size: 50
##    #:  1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##                                  
## Number of components      1.00000
## Clustering coefficient    0.70118
## Modularity                0.06109
## Positive edge percentage 46.16420
## Edge density              0.60653
## Natural connectivity      0.15972
## Vertex connectivity      11.00000
## Edge connectivity        11.00000
## Average dissimilarity*    0.80366
## Average path length**     1.00555
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
## 
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ```````````````````````````````` 
##               
## name:  1  2  3
##    #: 17 18 15
## 
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````                  
##  Lentibacter      
##  RS62_marine_group
##  Sulfurovum       
## 
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##                     
## Lentibacter       42
## Pseudarcobacter   38
## RS62_marine_group 38
## Sulfurovum        37
## Tropicibacter     36
## 
## Betweenness centrality (normalized):
##                           
## Marinovum          0.02721
## Lentibacter        0.02551
## Marivivens         0.02296
## Cyanobium_PCC-6307 0.01956
## Lewinella          0.01616
## 
## Closeness centrality (normalized):
##                         
## Sulfurovum       1.42923
## NS5_marine_group 1.40426
## Lentibacter      1.36705
## Methylobacter    1.36133
## Tropicibacter    1.33878
## 
## Eigenvector centrality (normalized):
##                          
## Lentibacter       1.00000
## Sulfurovum        0.97226
## RS62_marine_group 0.92932
## NS5_marine_group  0.91158
## Pseudarcobacter   0.90952
pdf(paste0(wdir, "fig/network-pearson-genus-16s.pdf"), width = 30, height = 15)
p <- plot(props_pears_genus,
          edgeInvisFilter = "threshold",
          edgeInvisPar = 0.5,
          nodeColor = "cluster",
          colorVec =  c("#804000", "#357090", "#009960"),
          nodeSize = "eigenvector",
          repulsion = 0.8,
          rmSingles = TRUE,
          labelScale = FALSE,
          cexLabels = 1.6,
          nodeSizeSpread = 3,
          cexNodes = 2,
          hubBorderCol = "darkgray",
          #title1 = "Network on genus level with Pearson correlations",
          showTitle = FALSE,
          #cexTitle = 2.3)
          posCol = "#0072B2",
          negCol = '#e41a1c')
legend(0.75, 1.1, cex = 2.2, title = "estimated association:",
       legend = c("+","-"), lty = 1, lwd = 3, col = c("#0072B2", '#e41a1c'), 
       bty = "n", horiz = FALSE)
dev.off()
## png 
##   2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
## [1] 0.5890202
knitr::kable(as.matrix(sort(colSums(net_pears_genus$normCounts1), decreasing = TRUE)[1:20]))
Marivita 75.932138
Lentibacter 46.134143
Flavobacterium 34.910881
Lewinella 28.127237
NS3a_marine_group 27.840424
Methylophaga 27.692529
Aureispira 24.007935
Lutimaribacter 18.709552
Geitlerinema_PCC-7105 12.684909
Cyanobium_PCC-6307 12.209718
Saprospiraceae_uncultured 9.137318
Winogradskyella 8.136879
Methylomicrobium 8.059228
Psychroserpens 6.897760
Pseudorhodobacter 5.296707
Cryomorphaceae_uncultured 4.584473
Marivivens 4.465113
Planktomarina 3.711412
Taeseokella 2.791422
Hydrogenophaga 1.265531

Compare genera that were significant in differential abundance analysis

#lightVSocean: 11
my.genus.1 <- res_sig.aldex.lightVSocean %>% dplyr::select(Genus) %>% as.data.frame()
my.genus.1$lightVSocean <- 1
#lightVSdark: 21
my.genus.2 <- tax_table(biom.16s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1.5,] %>% row.names(),] %>% as.data.frame() %>% dplyr::select(Genus)
my.genus.2$lightVSdark <- 1
#light.time.trend: 21
my.genus.3 <- dge.5.u %>% dplyr::select(Genus) %>% as.data.frame
my.genus.3$light.time.trend <- 1
#merge
my.genus <- merge(x = my.genus.1, y = my.genus.2, by = "Genus", all = TRUE)
my.genus <- merge(x = my.genus, y = my.genus.3, by = "Genus", all = TRUE)

biom.16s.genus.1 <- biom.16s.genus
sample_data(biom.16s.genus.1) <- sample_data(biom.16s.genus) %>% as.data.frame() %>% as.matrix() %>% as.data.frame() %>% unite(culture_setup_time, c("Culture_setup", "Time_point"))
temp.genus <- merge_samples(biom.16s.genus.1, "culture_setup_time", fun=sum) %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt()
temp.abund <- data.frame()
for(i in 1:length(my.genus$Genus)) {
  temp.out <- temp.genus %>% dplyr::filter(Genus %in% my.genus$Genus[i]) %>% dplyr::select(Sample,Abundance,Genus)
  temp.abund <- rbind(temp.abund, temp.out)
}
my.genus.abund <- reshape2::acast(temp.abund, Genus~Sample, value.var = "Abundance")[,c(11,6,7,4,5,9,10,8,2,3,1)] %>% as.data.frame() %>% tibble::rownames_to_column(var = "Genus")
my.genus.light.16s <- merge(x = my.genus, y = my.genus.abund, by = "Genus", all = TRUE)
knitr::kable(as.matrix(my.genus.light.16s))
Genus lightVSocean lightVSdark light.time.trend ocean_0 light+clean_gas_5 light+clean_gas_7 light+clean_gas_11 light+clean_gas_14 light+used_gas_5 light+used_gas_7 light+used_gas_11 dark+clean_gas_5 dark+clean_gas_7 dark+clean_gas_11
Ahrensia NA NA 1 0.000000e+00 0.0000000000 0.0002755937 0.0012485268 0.000000000 0.0000000000 0.0000000000 0.0017333274 0.0000000000 0.000000000 0.0000000000
Aphanizomenon_NIES81 NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0024296440 0.000000000 0.0000000000 0.0000000000 0.0028496620 0.0000000000 0.000000000 0.0000000000
Aureispira 1 1 NA 6.808685e-03 0.0212254783 0.3910692863 0.4739771827 0.000000000 0.0176424624 0.4763975828 0.2968543447 0.0000000000 0.004895320 0.0000000000
Cohaesibacter NA NA 1 0.000000e+00 0.0000000000 0.0006749333 0.0030096130 0.000000000 0.0000000000 0.0000000000 0.0023237684 0.0000000000 0.000000000 0.0003355920
Congregibacter NA 1 NA 0.000000e+00 0.0000000000 0.0002322213 0.0011860258 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000
Cyanobium_PCC-6307 NA 1 NA 2.090557e-02 0.0042945492 0.0049360049 0.0652519415 0.042124248 0.0040058679 0.0026274961 0.0111591612 0.0000000000 0.000000000 0.0001446189
Cyclobacterium NA NA 1 8.361058e-05 0.0000000000 0.0008338526 0.0090586584 0.010778457 0.0000000000 0.0000000000 0.0064543753 0.0000000000 0.000000000 0.0018638068
Denitromonas NA NA 1 0.000000e+00 0.0000000000 0.0009830893 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0043392431 0.0019635142 0.000000000 0.0015089648
Devosia NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0004476291 0.000000000 0.0000000000 0.0000000000 0.0011458209 0.0000000000 0.000000000 0.0002455133
Flexithrix NA 1 NA 0.000000e+00 0.0036042295 0.0032408892 0.0000000000 0.000000000 0.0005576115 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000
Geitlerinema_PCC-7105 1 1 NA 2.856894e-03 0.4369261401 0.0466434451 0.0156534819 0.013997676 0.3475910717 0.0300407531 0.0179048376 0.0006570818 0.000000000 0.0000000000
Geminocystis_PCC-6308 NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0020425959 0.000000000 0.0000000000 0.0000000000 0.0010466521 0.0000000000 0.000000000 0.0000000000
Gimesia NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0047030673 0.000000000 0.0000000000 0.0000000000 0.0033184462 0.0000000000 0.000000000 0.0000000000
Hoeflea NA NA 1 6.799129e-04 0.0015239639 0.0042782162 0.0192128650 0.064235061 0.0014487109 0.0021087878 0.0207822045 0.0018202109 0.002825818 0.0040161491
Labrenzia NA NA 1 1.552768e-04 0.0000000000 0.0017825184 0.0030800261 0.000000000 0.0000000000 0.0017992925 0.0040907596 0.0008018876 0.000000000 0.0027660224
Leptolyngbya_LEGE-06070 1 1 NA 2.216495e-04 0.0279353024 0.0049286984 0.0022635154 0.000000000 0.0285922643 0.0040799291 0.0013624441 0.0000000000 0.000000000 0.0000000000
Marivita NA NA 1 1.466339e-02 0.0976943417 0.1303832950 0.2146588184 0.299145251 0.1089507411 0.1430977866 0.4013861934 0.0308147473 0.079643158 0.4551946777
Membranicola NA NA 1 0.000000e+00 0.0000000000 0.0001763800 0.0019654980 0.000000000 0.0000000000 0.0005459890 0.0034007996 0.0000000000 0.000000000 0.0015698152
Methylomicrobium NA NA 1 4.107774e-03 0.0000000000 0.0003282668 0.0012333458 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.1986096679 0.250173474 0.0253570848
Methylophaga NA NA 1 3.379498e-03 0.0007062692 0.0050298790 0.0084394570 0.082254218 0.0008083664 0.0012557918 0.0035625787 0.1631236464 0.129891161 0.1082383874
Nautella NA NA 1 0.000000e+00 0.0000000000 0.0002162934 0.0000000000 0.000000000 0.0000000000 0.0008171678 0.0013259973 0.0000000000 0.000000000 0.0028750247
Nioella NA NA 1 4.563681e-04 0.0007750110 0.0032633822 0.0057504545 0.024132413 0.0017131879 0.0016280435 0.0046061824 0.0037895812 0.003820766 0.0035431194
NS2b_marine_group NA 1 NA 0.000000e+00 0.0000000000 0.0022541692 0.0003727375 0.021066518 0.0006067598 0.0016204349 0.0003967816 0.0000000000 0.000000000 0.0000000000
Porphyrobacter 1 1 1 0.000000e+00 0.0000000000 0.0001899139 0.0023693512 0.000000000 0.0000000000 0.0012566816 0.0000000000 0.0000000000 0.000000000 0.0000000000
Pseudohoeflea NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0010752881 0.000000000 0.0000000000 0.0000000000 0.0007783249 0.0000000000 0.000000000 0.0001511979
Pseudorhodobacter 1 NA NA 9.518934e-04 0.0073002743 0.0235046912 0.0081642848 0.021447560 0.0082964378 0.0345157831 0.0068608816 0.0031892845 0.003693128 0.0021981458
Rheinheimera NA NA 1 2.114121e-04 0.0000000000 0.0005473819 0.0012469665 0.011358610 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0001135262
Schizothrix_LEGE_07164 1 1 NA 0.000000e+00 0.0007606409 0.0009768852 0.0000000000 0.000000000 0.0011680427 0.0005776241 0.0000000000 0.0000000000 0.000000000 0.0000000000
Symphothece_PCC-7002 1 1 NA 4.971324e-04 0.0558599164 0.0226521751 0.0023310233 0.000000000 0.0576151105 0.0084967991 0.0059078838 0.0000000000 0.000000000 0.0000000000
Synechocystis_PCC-6803 1 1 1 0.000000e+00 0.0000000000 0.0000000000 0.0015656554 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000
Taeseokella NA NA 1 4.677293e-04 0.0026763916 0.0047277154 0.0328121404 0.009217898 0.0011426820 0.0070170485 0.0367128905 0.0011930301 0.004288548 0.0036569506
Thalassospira 1 1 NA 7.347337e-04 0.0020072938 0.0471862906 0.0134617628 0.000000000 0.0029291942 0.0678548179 0.0310070252 0.0000000000 0.000000000 0.0000000000
#darkVSocean: 17
my.genus.4 <- res_sig.aldex.darkVSocean %>% dplyr::select(Genus) %>% as.data.frame()
my.genus.4$darkVSocean <- 1
#darkVSlight: 51
my.genus.5 <- tax_table(biom.16s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1.5,] %>% row.names(),] %>% as.data.frame() %>% dplyr::select(Genus)
my.genus.5$darkVSlight <- 1
#dark.time.trend: 49
my.genus.6 <- dge.6.u %>% dplyr::select(Genus) %>% as.data.frame
my.genus.6$dark.time.trend <- 1
#merge
my.genus <- merge(x = my.genus.4, y = my.genus.5, by = "Genus", all = TRUE)
my.genus <- merge(x = my.genus, y = my.genus.6, by = "Genus", all = TRUE)

temp.abund <- data.frame()
for(i in 1:length(my.genus$Genus)) {
  temp.out <- temp.genus %>% dplyr::filter(Genus %in% my.genus$Genus[i]) %>% dplyr::select(Sample,Abundance,Genus)
  temp.abund <- rbind(temp.abund, temp.out)
}
my.genus.abund <- reshape2::acast(temp.abund, Genus~Sample, value.var = "Abundance")[,c(11,6,7,4,5,9,10,8,2,3,1)] %>% as.data.frame() %>% tibble::rownames_to_column(var = "Genus")
my.genus.dark.16s <- merge(x = my.genus, y = my.genus.abund, by = "Genus", all = TRUE)
knitr::kable(as.matrix(my.genus.dark.16s))
Genus darkVSocean darkVSlight dark.time.trend ocean_0 light+clean_gas_5 light+clean_gas_7 light+clean_gas_11 light+clean_gas_14 light+used_gas_5 light+used_gas_7 light+used_gas_11 dark+clean_gas_5 dark+clean_gas_7 dark+clean_gas_11
Aequorivita 1 1 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 1.832269e-03
Albimonas NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0003998426 0.0000000000 0.0018285727 0.001345524 2.944809e-03
Alcanivorax 1 NA NA 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0038442545 0.002124497 2.011676e-03
Antarctobacter 1 1 1 8.116697e-05 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0028121981 0.000000000 2.009017e-03
Aquibacter NA 1 NA 3.156040e-03 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0019088860 0.000000000 4.895079e-04
Arenibacter NA NA 1 1.163180e-03 0.0000000000 0.0012916132 0.0000000000 0.020757124 0.0000000000 0.0000000000 0.0000000000 0.0011389587 0.003186910 1.095420e-02
Bizionia NA NA 1 5.079168e-04 0.0000000000 0.0000000000 0.0000000000 0.005515630 0.0000000000 0.0000000000 0.0010516700 0.0005828828 0.004748518 4.758394e-03
Candidatus_Megaira NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0005704324 0.000000000 1.978612e-03
Cellulophaga NA NA 1 1.075482e-03 0.0007806143 0.0011316863 0.0004697176 0.000000000 0.0004173870 0.0013700723 0.0014665556 0.0036618264 0.010797998 1.744993e-02
Cohaesibacter NA NA 1 0.000000e+00 0.0000000000 0.0006749333 0.0030096130 0.000000000 0.0000000000 0.0000000000 0.0023237684 0.0000000000 0.000000000 3.355920e-04
Cyclobacterium NA NA 1 8.361058e-05 0.0000000000 0.0008338526 0.0090586584 0.010778457 0.0000000000 0.0000000000 0.0064543753 0.0000000000 0.000000000 1.863807e-03
Denitromonas NA NA 1 0.000000e+00 0.0000000000 0.0009830893 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0043392431 0.0019635142 0.000000000 1.508965e-03
Flavobacteriaceae 1 1 1 1.465568e-03 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0022198774 0.005559746 4.333731e-03
Hoeflea NA NA 1 6.799129e-04 0.0015239639 0.0042782162 0.0192128650 0.064235061 0.0014487109 0.0021087878 0.0207822045 0.0018202109 0.002825818 4.016149e-03
Labrenzia NA NA 1 1.552768e-04 0.0000000000 0.0017825184 0.0030800261 0.000000000 0.0000000000 0.0017992925 0.0040907596 0.0008018876 0.000000000 2.766022e-03
Legionellaceae_uncultured NA NA 1 0.000000e+00 0.0027012178 0.0028538941 0.0000000000 0.000000000 0.0009356290 0.0046546499 0.0000000000 0.0002673780 0.000000000 4.439936e-03
Leucothrix 1 1 1 2.672806e-03 0.0008177871 0.0000000000 0.0000000000 0.000000000 0.0006132225 0.0000000000 0.0000000000 0.0064480522 0.012109591 2.419180e-02
Marinicella NA NA 1 2.056224e-04 0.0000000000 0.0003129874 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0008715804 0.003337973 5.296964e-03
Mariniflexile 1 1 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0006591556 0.002593059 3.042980e-03
Marinovum NA NA 1 5.102455e-04 0.0012782168 0.0031034346 0.0037769213 0.000000000 0.0008687154 0.0044648891 0.0042497784 0.0115157462 0.007952507 1.274805e-02
Marivita NA NA 1 1.466339e-02 0.0976943417 0.1303832950 0.2146588184 0.299145251 0.1089507411 0.1430977866 0.4013861934 0.0308147473 0.079643158 4.551947e-01
Membranicola NA NA 1 0.000000e+00 0.0000000000 0.0001763800 0.0019654980 0.000000000 0.0000000000 0.0005459890 0.0034007996 0.0000000000 0.000000000 1.569815e-03
Meridianimaribacter 1 1 1 7.487941e-04 0.0013691579 0.0005527849 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0029245680 0.019121129 3.670825e-02
Mesoflavibacter 1 NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.007918675 3.666090e-03
Methylobacter 1 1 NA 5.468674e-04 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0438901766 0.040019563 1.196080e-02
Methylomicrobium 1 1 NA 4.107774e-03 0.0000000000 0.0003282668 0.0012333458 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.1986096679 0.250173474 2.535708e-02
Methylomonadaceae_uncultured 1 1 NA 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0082250754 0.013203332 5.923953e-04
Methylomonas 1 1 NA 0.000000e+00 0.0000000000 0.0017159768 0.0006856936 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0150801648 0.015066388 2.634419e-03
Methylophaga 1 1 NA 3.379498e-03 0.0007062692 0.0050298790 0.0084394570 0.082254218 0.0008083664 0.0012557918 0.0035625787 0.1631236464 0.129891161 1.082384e-01
Methylosarcina 1 1 NA 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0053555983 0.005473982 7.487388e-05
Muricauda NA NA 1 0.000000e+00 0.0000000000 0.0014466318 0.0000000000 0.016041084 0.0000000000 0.0005947820 0.0000000000 0.0000000000 0.000000000 1.619021e-03
Nannocystaceae_uncultured 1 1 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0006582078 0.001632063 1.110616e-03
Nannocystis NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 7.748460e-04
Nautella NA NA 1 0.000000e+00 0.0000000000 0.0002162934 0.0000000000 0.000000000 0.0000000000 0.0008171678 0.0013259973 0.0000000000 0.000000000 2.875025e-03
Neptunomonas NA NA 1 1.353942e-04 0.0000000000 0.0008513155 0.0005442763 0.000000000 0.0017000879 0.0043217679 0.0106348511 0.0006595328 0.001338421 1.644501e-03
Nioella NA NA 1 4.563681e-04 0.0007750110 0.0032633822 0.0057504545 0.024132413 0.0017131879 0.0016280435 0.0046061824 0.0037895812 0.003820766 3.543119e-03
Oceanobacterium NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0006021070 0.000000000 1.341588e-03
Oricola NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0005442818 0.000000000 0.0000000000 0.0000000000 0.0012449576 0.0000000000 0.000000000 3.357265e-04
Phaeobacter NA NA 1 2.009532e-04 0.0000000000 0.0004764591 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0027059497 0.000000000 1.184285e-02
Primorskyibacter NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0005834410 0.000000000 1.903747e-03
Pseudohoeflea NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0010752881 0.000000000 0.0000000000 0.0000000000 0.0007783249 0.0000000000 0.000000000 1.511979e-04
Psychroserpens NA NA 1 1.392088e-03 0.0066090015 0.0055332905 0.0013989326 0.005940084 0.0040755943 0.0059378838 0.0020921003 0.0029410488 0.010420519 1.842288e-02
Rhodobacteraceae NA NA 1 5.648481e-04 0.0000000000 0.0022997705 0.0010142864 0.006512690 0.0007569106 0.0044767990 0.0041107715 0.0055404839 0.000000000 1.137303e-02
Rhodobacteraceae_uncultured NA NA 1 6.301788e-03 0.0000000000 0.0017093934 0.0015060674 0.000000000 0.0000000000 0.0003141622 0.0003524818 0.0071847670 0.000000000 1.402671e-02
Roseobacter NA NA 1 3.498325e-04 0.0000000000 0.0006497742 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0015646925 0.003364529 1.785765e-03
Ruegeria NA NA 1 0.000000e+00 0.0000000000 0.0004328169 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0007281610 0.000000000 4.203980e-03
Sagittula NA NA 1 1.786031e-04 0.0000000000 0.0008082698 0.0010889112 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0029867921 0.005385985 3.560316e-03
Schleiferia NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 6.434068e-04
Sedimentitalea 1 NA 1 0.000000e+00 0.0000000000 0.0002101179 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0011499838 0.000000000 9.641517e-04
SM1A02 NA NA 1 0.000000e+00 0.0000000000 0.0006803159 0.0000000000 0.016875810 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 1.204017e-03
Taeseokella NA NA 1 4.677293e-04 0.0026763916 0.0047277154 0.0328121404 0.009217898 0.0011426820 0.0070170485 0.0367128905 0.0011930301 0.004288548 3.656951e-03
Tenacibaculum NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0003503334 0.002321306 1.059759e-03
Tropicibacter 1 1 NA 6.143363e-04 0.0000000000 0.0001705410 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0124296965 0.010003630 5.452494e-03
Vitellibacter NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 8.207555e-04
Winogradskyella NA NA 1 2.137090e-03 0.0012266796 0.0232761781 0.0005511586 0.012842383 0.0008125185 0.0043722932 0.0013221244 0.0030721625 0.017461028 3.124321e-02
Xanthomarina NA NA 1 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 4.259341e-04
Youngimonas 1 1 NA 1.522092e-04 0.0000000000 0.0000000000 0.0000000000 0.000000000 0.0000000000 0.0000000000 0.0000000000 0.0111975727 0.004940597 4.408582e-03

List of all genera with relative abundances per culture setup and time point

biom.16s.genus.1 <- biom.16s.genus
sample_data(biom.16s.genus.1) <- sample_data(biom.16s.genus) %>% as.data.frame() %>% as.matrix() %>% as.data.frame() %>% unite(culture_setup_time, c("Culture_setup", "Time_point"))
temp.genus <- merge_samples(biom.16s.genus.1, "culture_setup_time", fun=sum) %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt()
temp.abund.all <- temp.genus %>% dplyr::select(Sample,Abundance,Genus)
rel.abund.16s <- reshape2::acast(temp.abund.all, Genus~Sample, value.var = "Abundance")
write.table(as.matrix(rel.abund.16s), file = paste0(wdir, "tab-rel-abund-culture-time-16s.csv"), col.names = T, row.names = T, sep = ",", quote = F)

Relative abundance of selected genera

#relative abundance of sequences from methanotrophic bacteria (27--33\%) in the middle of the experiment (day 5--7)
sample_data(biom.16s)$culture_setup_time_point <- mapply(paste0, as.character(get_variable(biom.16s, "Culture_setup")), as.character(get_variable(biom.16s, "Time_point")), collapse = "_")
temp.mob <- merge_samples(biom.16s, "culture_setup_time_point") %>% tax_glom(taxrank = "Phylum") %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt() %>% filter(Phylum %in% "Methanotroph") %>% dplyr::select(Sample, Abundance, Phylum)

#At the end of the dark incubation, other chemoheterotrophic microbes (especially the Proteobacteria \textit{Marivita} and \textit{Methylophaga}) had outcompeted the methanotrophs.
temp.dark.11 <- merge_samples(biom.16s, "culture_setup_time_point") %>% tax_glom(taxrank = "Genus") %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt() %>% filter(Sample %in% "dark+clean_gas11" & Abundance>0.01) %>% dplyr::select(Sample, Abundance, Phylum, Genus)

temp.cyano <- merge_samples(biom.16s, "culture_setup_time_point") %>% tax_glom(taxrank = "Phylum") %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt() %>% filter(Phylum %in% "Cyanobacteria") %>% dplyr::select(Sample, Abundance, Phylum)

temp.light.5 <- merge_samples(biom.16s, "culture_setup_time_point") %>% tax_glom(taxrank = "Genus") %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt() %>% filter(Sample %in% "light+clean_gas5" & Abundance>0.01) %>% dplyr::select(Sample, Abundance, Phylum, Genus)

temp.light.11 <- merge_samples(biom.16s, "culture_setup_time_point") %>% tax_glom(taxrank = "Genus") %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt() %>% filter(Sample %in% "light+clean_gas11" & Abundance>0.01) %>% dplyr::select(Sample, Abundance, Phylum, Genus)

my.genus <- c(
  "Geitlerinema_PCC-7105",
  "Methylomicrobium",
  "Methylobacter",
  "Methylomonas",
  "Marivivens",
  "Winogradskyella",
  "Symphothece_PCC-7002",
  "Cyanobium_PCC-6307",
  "Alteromonas"
)
temp.genus <- merge_samples(biom.16s, "Culture_setup") %>% tax_glom(taxrank = "Genus") %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt()
temp.abund <- data.frame()
for(i in 1:length(my.genus)) {
  temp.out <- temp.genus %>% filter(Genus==my.genus[i]) %>%dplyr::select(Sample, Abundance, Genus)
  temp.abund <- rbind(temp.abund, temp.out)
}
knitr::kable(as.matrix(reshape2::acast(temp.abund, Genus~Sample, value.var = "Abundance")))
dark+clean_gas light+clean_gas light+used_gas ocean
Alteromonas 0.0000000 0.0008821 0.0067583 0.0001008
Cyanobium_PCC-6307 0.0001001 0.0179715 0.0058653 0.0209056
Geitlerinema_PCC-7105 0.0001616 0.1087390 0.1348442 0.0028569
Marivivens 0.0276675 0.0002971 0.0001893 0.0189411
Methylobacter 0.0215455 0.0000000 0.0000000 0.0005469
Methylomicrobium 0.0818416 0.0004492 0.0000000 0.0041078
Methylomonas 0.0064627 0.0011652 0.0000000 0.0000000
Symphothece_PCC-7002 0.0000000 0.0238959 0.0244769 0.0004971
Winogradskyella 0.0234637 0.0144901 0.0021645 0.0021371
#save workspace
save.image(paste0(devdir, "/phyloseq-16s.Rdata"))

session information

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Copenhagen
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] splines   stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] NetCoMi_1.1.0               SpiecEasi_1.1.3            
##  [3] DirichletMultinomial_1.46.0 ALDEx2_1.36.0              
##  [5] latticeExtra_0.6-30         zCompositions_1.5.0-3      
##  [7] truncnorm_1.0-9             NADA_1.6-1.1               
##  [9] survival_3.6-4              MASS_7.3-60.2              
## [11] microbiome_1.26.0           factoextra_1.0.7           
## [13] FactoMineR_2.11             ggbiplot_0.6.2             
## [15] knitr_1.47                  ggpubr_0.6.0               
## [17] patchwork_1.2.0             gplots_3.1.3.1             
## [19] edgeR_4.2.0                 limma_3.60.2               
## [21] DESeq2_1.44.0               SummarizedExperiment_1.34.0
## [23] Biobase_2.64.0              MatrixGenerics_1.16.0      
## [25] matrixStats_1.3.0           GenomicRanges_1.56.0       
## [27] GenomeInfoDb_1.40.1         IRanges_2.38.0             
## [29] S4Vectors_0.42.0            BiocGenerics_0.50.0        
## [31] vegan_2.6-6.1               lattice_0.22-6             
## [33] permute_0.9-7               ggrepel_0.9.5              
## [35] RColorBrewer_1.1-3          psadd_0.1.3                
## [37] reshape2_1.4.4              phyloseq_1.48.0            
## [39] lubridate_1.9.3             forcats_1.0.0              
## [41] stringr_1.5.1               dplyr_1.1.4                
## [43] purrr_1.0.2                 readr_2.1.5                
## [45] tidyr_1.3.1                 tibble_3.2.1               
## [47] tidyverse_2.0.0             ggplot2_3.5.1              
## 
## loaded via a namespace (and not attached):
##   [1] nnet_7.3-19             DT_0.33                 Biostrings_2.72.1      
##   [4] TH.data_1.1-2           vctrs_0.6.5             digest_0.6.35          
##   [7] png_0.1-8               corpcor_1.6.10          shape_1.4.6.1          
##  [10] pcaPP_2.0-4             registry_0.5-1          corrplot_0.92          
##  [13] deldir_2.0-4            parallelly_1.37.1       SPRING_1.0.4           
##  [16] foreach_1.5.2           withr_3.0.0             psych_2.4.3            
##  [19] xfun_0.44               doRNG_1.8.6             memoise_2.0.1          
##  [22] emmeans_1.10.2          latentcor_2.0.1         zoo_1.8-12             
##  [25] gtools_3.9.5            pbapply_1.7-2           Formula_1.2-5          
##  [28] KEGGREST_1.44.0         scatterplot3d_0.3-44    httr_1.4.7             
##  [31] rstatix_0.7.2           globals_0.16.3          rhdf5filters_1.16.0    
##  [34] rhdf5_2.48.0            rstudioapi_0.16.0       UCSC.utils_1.0.0       
##  [37] generics_0.1.3          base64enc_0.1-3         zlibbioc_1.50.0        
##  [40] ca_0.71.1               doSNOW_1.0.20           RcppZiggurat_0.1.6     
##  [43] GenomeInfoDbData_1.2.12 quadprog_1.5-8          SparseArray_1.4.8      
##  [46] xtable_1.8-4            ade4_1.7-22             doParallel_1.0.17      
##  [49] evaluate_0.23           S4Arrays_1.4.1          Rfast_2.1.0            
##  [52] preprocessCore_1.66.0   hms_1.1.3               glmnet_4.1-8           
##  [55] pulsar_0.3.11           irlba_2.3.5.1           colorspace_2.1-0       
##  [58] magrittr_2.0.3          viridis_0.6.5           future.apply_1.11.2    
##  [61] cowplot_1.1.3           Hmisc_5.1-3             pillar_1.9.0           
##  [64] nlme_3.1-164            huge_1.3.5              iterators_1.0.14       
##  [67] caTools_1.18.2          compiler_4.4.1          stringi_1.8.4          
##  [70] biomformat_1.32.0       TSP_1.2-4               dendextend_1.17.1      
##  [73] plyr_1.8.9              crayon_1.5.2            abind_1.4-5            
##  [76] timeSeries_4032.109     sn_2.1.1                locfit_1.5-9.9         
##  [79] bit_4.0.5               rootSolve_1.8.2.4       mixedCCA_1.6.2         
##  [82] sandwich_3.1-0          fastcluster_1.2.6       codetools_0.2-20       
##  [85] multcomp_1.4-25         directlabels_2024.1.21  bslib_0.7.0            
##  [88] plotly_4.10.4           multtest_2.60.0         Rcpp_1.0.12            
##  [91] qgraph_1.9.8            magic_1.6-1             interp_1.1-6           
##  [94] leaps_3.1               blob_1.2.4              utf8_1.2.4             
##  [97] fBasics_4032.96         pbivnorm_0.6.0          listenv_0.9.1          
## [100] checkmate_2.3.1         Rdpack_2.6              ggsignif_0.6.4         
## [103] estimability_1.5.1      lavaan_0.6-17           Matrix_1.7-0           
## [106] statmod_1.5.0           tzdb_0.4.0              pkgconfig_2.0.3        
## [109] tools_4.4.1             cachem_1.1.0            rbibutils_2.2.16       
## [112] RSQLite_2.3.7           viridisLite_0.4.2       DBI_1.2.3              
## [115] numDeriv_2016.8-1.1     impute_1.78.0           fastmap_1.2.0          
## [118] rmarkdown_2.27          scales_1.3.0            grid_4.4.1             
## [121] fMultivar_4031.84       broom_1.0.6             sass_0.4.9             
## [124] coda_0.19-4.1           carData_3.0-5           rpart_4.1.23           
## [127] snow_0.4-4              farver_2.1.2            mgcv_1.9-1             
## [130] yaml_2.3.8              VGAM_1.1-11             spatial_7.3-17         
## [133] foreign_0.8-86          cli_3.6.2               webshot_0.5.5          
## [136] lifecycle_1.0.4         mvtnorm_1.2-5           backports_1.5.0        
## [139] BiocParallel_1.38.0     timechange_0.3.0        gtable_0.3.5           
## [142] parallel_4.4.1          ape_5.8                 jsonlite_1.8.8         
## [145] seriation_1.5.5         bitops_1.0-7            multcompView_0.1-10    
## [148] bit64_4.0.5             assertthat_0.2.1        Rtsne_0.17             
## [151] glasso_1.11             doFuture_1.0.1          heatmaply_1.5.0        
## [154] RcppParallel_5.1.7      jquerylib_0.1.4         highr_0.11             
## [157] timeDate_4032.109       orca_1.1-2              lazyeval_0.2.2         
## [160] dynamicTreeCut_1.63-1   htmltools_0.5.8.1       GO.db_3.19.1           
## [163] glue_1.7.0              XVector_0.44.0          microbenchmark_1.4.10  
## [166] mnormt_2.1.1            jpeg_0.1-10             gridExtra_2.3          
## [169] flashClust_1.01-2       igraph_2.0.3            R6_2.5.1               
## [172] fdrtool_1.2.17          labeling_0.4.3          cluster_2.1.6          
## [175] rngtools_1.5.2          Rhdf5lib_1.26.0         DelayedArray_0.30.1    
## [178] tidyselect_1.2.1        plotrix_3.8-4           WGCNA_1.72-5           
## [181] htmlTable_2.4.2         car_3.1-2               AnnotationDbi_1.66.0   
## [184] future_1.33.2           filematrix_1.3          munsell_0.5.1          
## [187] KernSmooth_2.23-24      geometry_0.4.7          data.table_1.15.4      
## [190] htmlwidgets_1.6.4       rlang_1.1.3             fansi_1.0.6